From 235fa2b5a8cf2b48d41bca1b80e4c95ec730efde Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Thu, 30 Nov 2023 00:41:03 -0500 Subject: [PATCH 1/7] support Apple's new Accelerate in macOS 13 --- include/blas/fortran.h | 721 +++++++++++++++++++++++------------------ 1 file changed, 414 insertions(+), 307 deletions(-) diff --git a/include/blas/fortran.h b/include/blas/fortran.h index 89973302..decdcc76 100644 --- a/include/blas/fortran.h +++ b/include/blas/fortran.h @@ -8,13 +8,23 @@ #include "blas/defines.h" #include "blas/mangling.h" + +// Accelerate uses std::complex +#define BLAS_COMPLEX_CPP #include "blas/config.h" -// It seems all current Fortran compilers put strlen at end. -// Some historical compilers put strlen after the str argument -// or make the str argument into a struct. -#ifndef BLAS_FORTRAN_STRLEN_END -#define BLAS_FORTRAN_STRLEN_END +#ifdef ACCELERATE_NEW_LAPACK + #include +#else + // It seems all current Fortran compilers put strlen at end. + // Some historical compilers put strlen after the str argument + // or make the str argument into a struct. + // New Apple Accelerate (macOS >= 13.3) does not include strlen. + #ifndef BLAS_FORTRAN_STRLEN_END + #define BLAS_FORTRAN_STRLEN_END + #endif + + #define BLAS_PROTO #endif #ifdef __cplusplus @@ -24,283 +34,315 @@ extern "C" { // ============================================================================= // Level 1 BLAS - Fortran prototypes -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_saxpy BLAS_FORTRAN_NAME( saxpy, SAXPY ) +#define BLAS_daxpy BLAS_FORTRAN_NAME( daxpy, DAXPY ) +#define BLAS_caxpy BLAS_FORTRAN_NAME( caxpy, CAXPY ) +#define BLAS_zaxpy BLAS_FORTRAN_NAME( zaxpy, ZAXPY ) + +#ifdef BLAS_PROTO void BLAS_saxpy( blas_int const *n, float const *alpha, float const *x, blas_int const *incx, float *y, blas_int const *incy ); -#define BLAS_daxpy BLAS_FORTRAN_NAME( daxpy, DAXPY ) void BLAS_daxpy( blas_int const *n, double const *alpha, double const *x, blas_int const *incx, double *y, blas_int const *incy ); -#define BLAS_caxpy BLAS_FORTRAN_NAME( caxpy, CAXPY ) void BLAS_caxpy( blas_int const *n, blas_complex_float const *alpha, blas_complex_float const *x, blas_int const *incx, blas_complex_float *y, blas_int const *incy ); -#define BLAS_zaxpy BLAS_FORTRAN_NAME( zaxpy, ZAXPY ) void BLAS_zaxpy( blas_int const *n, blas_complex_double const *alpha, blas_complex_double const *x, blas_int const *incx, blas_complex_double *y, blas_int const *incy ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_sscal BLAS_FORTRAN_NAME( sscal, SSCAL ) +#define BLAS_dscal BLAS_FORTRAN_NAME( dscal, DSCAL ) +#define BLAS_cscal BLAS_FORTRAN_NAME( cscal, CSCAL ) +#define BLAS_zscal BLAS_FORTRAN_NAME( zscal, ZSCAL ) + +#ifdef BLAS_PROTO void BLAS_sscal( blas_int const *n, float const *alpha, float *x, blas_int const *incx ); -#define BLAS_dscal BLAS_FORTRAN_NAME( dscal, DSCAL ) void BLAS_dscal( blas_int const *n, double const *alpha, double *x, blas_int const *incx ); -#define BLAS_cscal BLAS_FORTRAN_NAME( cscal, CSCAL ) void BLAS_cscal( blas_int const *n, blas_complex_float const *alpha, blas_complex_float *x, blas_int const *incx ); -#define BLAS_zscal BLAS_FORTRAN_NAME( zscal, ZSCAL ) void BLAS_zscal( blas_int const *n, blas_complex_double const *alpha, blas_complex_double *x, blas_int const *incx ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_scopy BLAS_FORTRAN_NAME( scopy, SCOPY ) +#define BLAS_dcopy BLAS_FORTRAN_NAME( dcopy, DCOPY ) +#define BLAS_ccopy BLAS_FORTRAN_NAME( ccopy, CCOPY ) +#define BLAS_zcopy BLAS_FORTRAN_NAME( zcopy, ZCOPY ) + +#ifdef BLAS_PROTO void BLAS_scopy( blas_int const *n, float const *x, blas_int const *incx, float *y, blas_int const *incy ); -#define BLAS_dcopy BLAS_FORTRAN_NAME( dcopy, DCOPY ) void BLAS_dcopy( blas_int const *n, double const *x, blas_int const *incx, double *y, blas_int const *incy ); -#define BLAS_ccopy BLAS_FORTRAN_NAME( ccopy, CCOPY ) void BLAS_ccopy( blas_int const *n, blas_complex_float const *x, blas_int const *incx, blas_complex_float *y, blas_int const *incy ); -#define BLAS_zcopy BLAS_FORTRAN_NAME( zcopy, ZCOPY ) void BLAS_zcopy( blas_int const *n, blas_complex_double const *x, blas_int const *incx, blas_complex_double *y, blas_int const *incy ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_sswap BLAS_FORTRAN_NAME( sswap, SSWAP ) +#define BLAS_dswap BLAS_FORTRAN_NAME( dswap, DSWAP ) +#define BLAS_cswap BLAS_FORTRAN_NAME( cswap, CSWAP ) +#define BLAS_zswap BLAS_FORTRAN_NAME( zswap, ZSWAP ) + +#ifdef BLAS_PROTO void BLAS_sswap( blas_int const *n, float *x, blas_int const *incx, float *y, blas_int const *incy ); -#define BLAS_dswap BLAS_FORTRAN_NAME( dswap, DSWAP ) void BLAS_dswap( blas_int const *n, double *x, blas_int const *incx, double *y, blas_int const *incy ); -#define BLAS_cswap BLAS_FORTRAN_NAME( cswap, CSWAP ) void BLAS_cswap( blas_int const *n, blas_complex_float *x, blas_int const *incx, blas_complex_float *y, blas_int const *incy ); -#define BLAS_zswap BLAS_FORTRAN_NAME( zswap, ZSWAP ) void BLAS_zswap( blas_int const *n, blas_complex_double *x, blas_int const *incx, blas_complex_double *y, blas_int const *incy ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_sdot BLAS_FORTRAN_NAME( sdot, SDOT ) +#define BLAS_ddot BLAS_FORTRAN_NAME( ddot, DDOT ) + +#ifdef BLAS_PROTO blas_float_return BLAS_sdot( blas_int const *n, float const *x, blas_int const *incx, float const *y, blas_int const *incy ); -#define BLAS_ddot BLAS_FORTRAN_NAME( ddot, DDOT ) double BLAS_ddot( blas_int const *n, double const *x, blas_int const *incx, double const *y, blas_int const *incy ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_cdotc BLAS_FORTRAN_NAME( cdotc, CDOTC ) +#define BLAS_zdotc BLAS_FORTRAN_NAME( zdotc, ZDOTC ) +#define BLAS_cdotu BLAS_FORTRAN_NAME( cdotu, CDOTU ) +#define BLAS_zdotu BLAS_FORTRAN_NAME( zdotu, ZDOTU ) -// ----------------------------------------------------------------------------- +#ifdef BLAS_PROTO // For Fortran functions returning complex values, // define BLAS_COMPLEX_RETURN_ARGUMENT if result is a hidden first argument (Intel icc), // else the default is to return complex values (GNU gcc). #ifdef BLAS_COMPLEX_RETURN_ARGUMENT -#define BLAS_cdotc BLAS_FORTRAN_NAME( cdotc, CDOTC ) -void BLAS_cdotc( - blas_complex_float *result, - blas_int const *n, - blas_complex_float const *x, blas_int const *incx, - blas_complex_float const *y, blas_int const *incy ); - -#define BLAS_zdotc BLAS_FORTRAN_NAME( zdotc, ZDOTC ) -void BLAS_zdotc( - blas_complex_double *result, - blas_int const *n, - blas_complex_double const *x, blas_int const *incx, - blas_complex_double const *y, blas_int const *incy ); - -#define BLAS_cdotu BLAS_FORTRAN_NAME( cdotu, CDOTU ) -void BLAS_cdotu( - blas_complex_float *result, - blas_int const *n, - blas_complex_float const *x, blas_int const *incx, - blas_complex_float const *y, blas_int const *incy ); - -#define BLAS_zdotu BLAS_FORTRAN_NAME( zdotu, ZDOTU ) -void BLAS_zdotu( - blas_complex_double *result, - blas_int const *n, - blas_complex_double const *x, blas_int const *incx, - blas_complex_double const *y, blas_int const *incy ); + void BLAS_cdotc( + blas_complex_float *result, + blas_int const *n, + blas_complex_float const *x, blas_int const *incx, + blas_complex_float const *y, blas_int const *incy ); + + void BLAS_zdotc( + blas_complex_double *result, + blas_int const *n, + blas_complex_double const *x, blas_int const *incx, + blas_complex_double const *y, blas_int const *incy ); + + void BLAS_cdotu( + blas_complex_float *result, + blas_int const *n, + blas_complex_float const *x, blas_int const *incx, + blas_complex_float const *y, blas_int const *incy ); + + void BLAS_zdotu( + blas_complex_double *result, + blas_int const *n, + blas_complex_double const *x, blas_int const *incx, + blas_complex_double const *y, blas_int const *incy ); -// -------------------- #else // ! defined(BLAS_COMPLEX_RETURN_ARGUMENT) -#define BLAS_cdotc BLAS_FORTRAN_NAME( cdotc, CDOTC ) -blas_complex_float BLAS_cdotc( - blas_int const *n, - blas_complex_float const *x, blas_int const *incx, - blas_complex_float const *y, blas_int const *incy ); + blas_complex_float BLAS_cdotc( + blas_int const *n, + blas_complex_float const *x, blas_int const *incx, + blas_complex_float const *y, blas_int const *incy ); -#define BLAS_zdotc BLAS_FORTRAN_NAME( zdotc, ZDOTC ) -blas_complex_double BLAS_zdotc( - blas_int const *n, - blas_complex_double const *x, blas_int const *incx, - blas_complex_double const *y, blas_int const *incy ); + blas_complex_double BLAS_zdotc( + blas_int const *n, + blas_complex_double const *x, blas_int const *incx, + blas_complex_double const *y, blas_int const *incy ); -#define BLAS_cdotu BLAS_FORTRAN_NAME( cdotu, CDOTU ) -blas_complex_float BLAS_cdotu( - blas_int const *n, - blas_complex_float const *x, blas_int const *incx, - blas_complex_float const *y, blas_int const *incy ); + blas_complex_float BLAS_cdotu( + blas_int const *n, + blas_complex_float const *x, blas_int const *incx, + blas_complex_float const *y, blas_int const *incy ); -#define BLAS_zdotu BLAS_FORTRAN_NAME( zdotu, ZDOTU ) -blas_complex_double BLAS_zdotu( - blas_int const *n, - blas_complex_double const *x, blas_int const *incx, - blas_complex_double const *y, blas_int const *incy ); + blas_complex_double BLAS_zdotu( + blas_int const *n, + blas_complex_double const *x, blas_int const *incx, + blas_complex_double const *y, blas_int const *incy ); #endif // ! defined(BLAS_COMPLEX_RETURN) +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_snrm2 BLAS_FORTRAN_NAME( snrm2, SNRM2 ) +#define BLAS_dnrm2 BLAS_FORTRAN_NAME( dnrm2, DNRM2 ) +#define BLAS_scnrm2 BLAS_FORTRAN_NAME( scnrm2, SCNRM2 ) +#define BLAS_dznrm2 BLAS_FORTRAN_NAME( dznrm2, DZNRM2 ) + +#ifdef BLAS_PROTO blas_float_return BLAS_snrm2( blas_int const *n, float const *x, blas_int const *incx ); -#define BLAS_dnrm2 BLAS_FORTRAN_NAME( dnrm2, DNRM2 ) double BLAS_dnrm2( blas_int const *n, double const *x, blas_int const *incx ); -#define BLAS_scnrm2 BLAS_FORTRAN_NAME( scnrm2, SCNRM2 ) blas_float_return BLAS_scnrm2( blas_int const *n, blas_complex_float const *x, blas_int const *incx ); -#define BLAS_dznrm2 BLAS_FORTRAN_NAME( dznrm2, DZNRM2 ) double BLAS_dznrm2( blas_int const *n, blas_complex_double const *x, blas_int const *incx ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_sasum BLAS_FORTRAN_NAME( sasum, SASUM ) +#define BLAS_dasum BLAS_FORTRAN_NAME( dasum, DASUM ) +#define BLAS_scasum BLAS_FORTRAN_NAME( scasum, SCASUM ) +#define BLAS_dzasum BLAS_FORTRAN_NAME( dzasum, DZASUM ) + +#ifdef BLAS_PROTO blas_float_return BLAS_sasum( blas_int const *n, float const *x, blas_int const *incx ); -#define BLAS_dasum BLAS_FORTRAN_NAME( dasum, DASUM ) double BLAS_dasum( blas_int const *n, double const *x, blas_int const *incx ); -#define BLAS_scasum BLAS_FORTRAN_NAME( scasum, SCASUM ) blas_float_return BLAS_scasum( blas_int const *n, blas_complex_float const *x, blas_int const *incx ); -#define BLAS_dzasum BLAS_FORTRAN_NAME( dzasum, DZASUM ) double BLAS_dzasum( blas_int const *n, blas_complex_double const *x, blas_int const *incx ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_isamax BLAS_FORTRAN_NAME( isamax, ISAMAX ) +#define BLAS_idamax BLAS_FORTRAN_NAME( idamax, IDAMAX ) +#define BLAS_icamax BLAS_FORTRAN_NAME( icamax, ICAMAX ) +#define BLAS_izamax BLAS_FORTRAN_NAME( izamax, IZAMAX ) + +#ifdef BLAS_PROTO blas_int BLAS_isamax( blas_int const *n, float const *x, blas_int const *incx ); -#define BLAS_idamax BLAS_FORTRAN_NAME( idamax, IDAMAX ) blas_int BLAS_idamax( blas_int const *n, double const *x, blas_int const *incx ); -#define BLAS_icamax BLAS_FORTRAN_NAME( icamax, ICAMAX ) blas_int BLAS_icamax( blas_int const *n, blas_complex_float const *x, blas_int const *incx ); -#define BLAS_izamax BLAS_FORTRAN_NAME( izamax, IZAMAX ) blas_int BLAS_izamax( blas_int const *n, blas_complex_double const *x, blas_int const *incx ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // c is real // oddly, b is const for crotg, zrotg #define BLAS_srotg BLAS_FORTRAN_NAME( srotg, SROTG ) +#define BLAS_drotg BLAS_FORTRAN_NAME( drotg, DROTG ) +#define BLAS_crotg BLAS_FORTRAN_NAME( crotg, CROTG ) +#define BLAS_zrotg BLAS_FORTRAN_NAME( zrotg, ZROTG ) + +#ifdef BLAS_PROTO void BLAS_srotg( float *a, float *b, float *c, float *s ); -#define BLAS_drotg BLAS_FORTRAN_NAME( drotg, DROTG ) void BLAS_drotg( double *a, double *b, double *c, double *s ); -#define BLAS_crotg BLAS_FORTRAN_NAME( crotg, CROTG ) void BLAS_crotg( blas_complex_float *a, blas_complex_float const *b, float *c, blas_complex_float *s ); -#define BLAS_zrotg BLAS_FORTRAN_NAME( zrotg, ZROTG ) void BLAS_zrotg( blas_complex_double *a, blas_complex_double const *b, double *c, blas_complex_double *s ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // c is real #define BLAS_srot BLAS_FORTRAN_NAME( srot, SROT ) +#define BLAS_drot BLAS_FORTRAN_NAME( drot, DROT ) +#define BLAS_csrot BLAS_FORTRAN_NAME( csrot, CSROT ) +#define BLAS_zdrot BLAS_FORTRAN_NAME( zdrot, ZDROT ) +#define BLAS_crot BLAS_FORTRAN_NAME( crot, CROT ) +#define BLAS_zrot BLAS_FORTRAN_NAME( zrot, ZROT ) + +#ifdef BLAS_PROTO void BLAS_srot( blas_int const *n, float *x, blas_int const *incx, @@ -308,7 +350,6 @@ void BLAS_srot( float const *c, float const *s ); -#define BLAS_drot BLAS_FORTRAN_NAME( drot, DROT ) void BLAS_drot( blas_int const *n, double *x, blas_int const *incx, @@ -316,7 +357,6 @@ void BLAS_drot( double const *c, double const *s ); -#define BLAS_csrot BLAS_FORTRAN_NAME( csrot, CSROT ) void BLAS_csrot( blas_int const *n, blas_complex_float *x, blas_int const *incx, @@ -324,7 +364,6 @@ void BLAS_csrot( float const *c, float const *s ); -#define BLAS_zdrot BLAS_FORTRAN_NAME( zdrot, ZDROT ) void BLAS_zdrot( blas_int const *n, blas_complex_double *x, blas_int const *incx, @@ -332,7 +371,6 @@ void BLAS_zdrot( double const *c, double const *s ); -#define BLAS_crot BLAS_FORTRAN_NAME( crot, CROT ) void BLAS_crot( blas_int const *n, blas_complex_float *x, blas_int const *incx, @@ -340,16 +378,19 @@ void BLAS_crot( float const *c, blas_complex_float const *s ); -#define BLAS_zrot BLAS_FORTRAN_NAME( zrot, ZROT ) void BLAS_zrot( blas_int const *n, blas_complex_double *x, blas_int const *incx, blas_complex_double *y, blas_int const *incy, double const *c, blas_complex_double const *s ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_srotmg BLAS_FORTRAN_NAME( srotmg, SROTMG ) +#define BLAS_drotmg BLAS_FORTRAN_NAME( drotmg, DROTMG ) + +#ifdef BLAS_PROTO void BLAS_srotmg( float *d1, float *d2, @@ -357,34 +398,55 @@ void BLAS_srotmg( float const *y1, float *param ); -#define BLAS_drotmg BLAS_FORTRAN_NAME( drotmg, DROTMG ) void BLAS_drotmg( double *d1, double *d2, double *x1, double const *y1, double *param ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_srotm BLAS_FORTRAN_NAME( srotm, SROTM ) +#define BLAS_drotm BLAS_FORTRAN_NAME( drotm, DROTM ) + +#ifdef BLAS_PROTO void BLAS_srotm( blas_int const *n, float *x, blas_int const *incx, float *y, blas_int const *incy, float const *param ); -#define BLAS_drotm BLAS_FORTRAN_NAME( drotm, DROTM ) void BLAS_drotm( blas_int const *n, double *x, blas_int const *incx, double *y, blas_int const *incy, double const *param ); +#endif // BLAS_PROTO // ============================================================================= // Level 2 BLAS - Fortran prototypes -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_sgemv_base BLAS_FORTRAN_NAME( sgemv, SGEMV ) +#define BLAS_dgemv_base BLAS_FORTRAN_NAME( dgemv, DGEMV ) +#define BLAS_cgemv_base BLAS_FORTRAN_NAME( cgemv, CGEMV ) +#define BLAS_zgemv_base BLAS_FORTRAN_NAME( zgemv, ZGEMV ) + +#ifdef BLAS_FORTRAN_STRLEN_END + // Pass 1 for string lengths. + #define BLAS_sgemv( ... ) BLAS_sgemv_base( __VA_ARGS__, 1 ) + #define BLAS_dgemv( ... ) BLAS_dgemv_base( __VA_ARGS__, 1 ) + #define BLAS_cgemv( ... ) BLAS_cgemv_base( __VA_ARGS__, 1 ) + #define BLAS_zgemv( ... ) BLAS_zgemv_base( __VA_ARGS__, 1 ) +#else + #define BLAS_sgemv( ... ) BLAS_sgemv_base( __VA_ARGS__ ) + #define BLAS_dgemv( ... ) BLAS_dgemv_base( __VA_ARGS__ ) + #define BLAS_cgemv( ... ) BLAS_cgemv_base( __VA_ARGS__ ) + #define BLAS_zgemv( ... ) BLAS_zgemv_base( __VA_ARGS__ ) +#endif + +#ifdef BLAS_PROTO void BLAS_sgemv_base( char const *trans, blas_int const *m, blas_int const *n, @@ -398,7 +460,6 @@ void BLAS_sgemv_base( #endif ); -#define BLAS_dgemv_base BLAS_FORTRAN_NAME( dgemv, DGEMV ) void BLAS_dgemv_base( char const *trans, blas_int const *m, blas_int const *n, @@ -412,7 +473,6 @@ void BLAS_dgemv_base( #endif ); -#define BLAS_cgemv_base BLAS_FORTRAN_NAME( cgemv, CGEMV ) void BLAS_cgemv_base( char const *trans, blas_int const *m, blas_int const *n, @@ -426,7 +486,6 @@ void BLAS_cgemv_base( #endif ); -#define BLAS_zgemv_base BLAS_FORTRAN_NAME( zgemv, ZGEMV ) void BLAS_zgemv_base( char const *trans, blas_int const *m, blas_int const *n, @@ -439,22 +498,13 @@ void BLAS_zgemv_base( , size_t trans_len #endif ); +#endif // BLAS_PROTO -#ifdef BLAS_FORTRAN_STRLEN_END - // Pass 1 for string lengths. - #define BLAS_sgemv( ... ) BLAS_sgemv_base( __VA_ARGS__, 1 ) - #define BLAS_dgemv( ... ) BLAS_dgemv_base( __VA_ARGS__, 1 ) - #define BLAS_cgemv( ... ) BLAS_cgemv_base( __VA_ARGS__, 1 ) - #define BLAS_zgemv( ... ) BLAS_zgemv_base( __VA_ARGS__, 1 ) -#else - #define BLAS_sgemv( ... ) BLAS_sgemv_base( __VA_ARGS__ ) - #define BLAS_dgemv( ... ) BLAS_dgemv_base( __VA_ARGS__ ) - #define BLAS_cgemv( ... ) BLAS_cgemv_base( __VA_ARGS__ ) - #define BLAS_zgemv( ... ) BLAS_zgemv_base( __VA_ARGS__ ) -#endif - -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_sger BLAS_FORTRAN_NAME( sger, SGER ) +#define BLAS_dger BLAS_FORTRAN_NAME( dger, DGER ) + +#ifdef BLAS_PROTO void BLAS_sger( blas_int const *m, blas_int const *n, float const *alpha, @@ -462,16 +512,19 @@ void BLAS_sger( float const *y, blas_int const *incy, float *A, blas_int const *lda ); -#define BLAS_dger BLAS_FORTRAN_NAME( dger, DGER ) void BLAS_dger( blas_int const *m, blas_int const *n, double const *alpha, double const *x, blas_int const *incx, double const *y, blas_int const *incy, double *A, blas_int const *lda ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//---------------------------------------- #define BLAS_cgerc BLAS_FORTRAN_NAME( cgerc, CGERC ) +#define BLAS_zgerc BLAS_FORTRAN_NAME( zgerc, ZGERC ) + +#ifdef BLAS_PROTO void BLAS_cgerc( blas_int const *m, blas_int const *n, blas_complex_float const *alpha, @@ -479,16 +532,19 @@ void BLAS_cgerc( blas_complex_float const *y, blas_int const *incy, blas_complex_float *A, blas_int const *lda ); -#define BLAS_zgerc BLAS_FORTRAN_NAME( zgerc, ZGERC ) void BLAS_zgerc( blas_int const *m, blas_int const *n, blas_complex_double const *alpha, blas_complex_double const *x, blas_int const *incx, blas_complex_double const *y, blas_int const *incy, blas_complex_double *A, blas_int const *lda ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//---------------------------------------- #define BLAS_cgeru BLAS_FORTRAN_NAME( cgeru, CGERU ) +#define BLAS_zgeru BLAS_FORTRAN_NAME( zgeru, ZGERU ) + +#ifdef BLAS_PROTO void BLAS_cgeru( blas_int const *m, blas_int const *n, blas_complex_float const *alpha, @@ -496,16 +552,34 @@ void BLAS_cgeru( blas_complex_float const *y, blas_int const *incy, blas_complex_float *A, blas_int const *lda ); -#define BLAS_zgeru BLAS_FORTRAN_NAME( zgeru, ZGERU ) void BLAS_zgeru( blas_int const *m, blas_int const *n, blas_complex_double const *alpha, blas_complex_double const *x, blas_int const *incx, blas_complex_double const *y, blas_int const *incy, blas_complex_double *A, blas_int const *lda ); +#endif // BLAS_PROTO -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #define BLAS_ssymv_base BLAS_FORTRAN_NAME( ssymv, SSYMV ) +#define BLAS_dsymv_base BLAS_FORTRAN_NAME( dsymv, DSYMV ) +// #define BLAS_csymv_base BLAS_FORTRAN_NAME( csymv, CSYMV ) +// #define BLAS_zsymv_base BLAS_FORTRAN_NAME( zsymv, ZSYMV ) + +#ifdef BLAS_FORTRAN_STRLEN_END + // Pass 1 for string lengths. + #define BLAS_ssymv( ... ) BLAS_ssymv_base( __VA_ARGS__, 1 ) + #define BLAS_dsymv( ... ) BLAS_dsymv_base( __VA_ARGS__, 1 ) + //#define BLAS_csymv( ... ) BLAS_csymv_base( __VA_ARGS__, 1 ) + //#define BLAS_zsymv( ... ) BLAS_zsymv_base( __VA_ARGS__, 1 ) +#else + #define BLAS_ssymv( ... ) BLAS_ssymv_base( __VA_ARGS__ ) + #define BLAS_dsymv( ... ) BLAS_dsymv_base( __VA_ARGS__ ) + //#define BLAS_csymv( ... ) BLAS_csymv_base( __VA_ARGS__ ) + //#define BLAS_zsymv( ... ) BLAS_zsymv_base( __VA_ARGS__ ) +#endif + +#ifdef BLAS_PROTO void BLAS_ssymv_base( char const *uplo, blas_int const *n, @@ -519,7 +593,6 @@ void BLAS_ssymv_base( #endif ); -#define BLAS_dsymv_base BLAS_FORTRAN_NAME( dsymv, DSYMV ) void BLAS_dsymv_base( char const *uplo, blas_int const *n, @@ -534,7 +607,6 @@ void BLAS_dsymv_base( ); // [cz]symv moved to LAPACK++ since they are provided by LAPACK. -// #define BLAS_csymv_base BLAS_FORTRAN_NAME( csymv, CSYMV ) // void BLAS_csymv_base( // char const *uplo, // blas_int const *n, @@ -548,7 +620,6 @@ void BLAS_dsymv_base( // #endif // ); // -// #define BLAS_zsymv_base BLAS_FORTRAN_NAME( zsymv, ZSYMV ) // void BLAS_zsymv_base( // char const *uplo, // blas_int const *n, @@ -561,22 +632,22 @@ void BLAS_dsymv_base( // , size_t uplo_len // #endif // ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_chemv_base BLAS_FORTRAN_NAME( chemv, CHEMV ) +#define BLAS_zhemv_base BLAS_FORTRAN_NAME( zhemv, ZHEMV ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_ssymv( ... ) BLAS_ssymv_base( __VA_ARGS__, 1 ) - #define BLAS_dsymv( ... ) BLAS_dsymv_base( __VA_ARGS__, 1 ) - //#define BLAS_csymv( ... ) BLAS_csymv_base( __VA_ARGS__, 1 ) - //#define BLAS_zsymv( ... ) BLAS_zsymv_base( __VA_ARGS__, 1 ) + #define BLAS_chemv( ... ) BLAS_chemv_base( __VA_ARGS__, 1 ) + #define BLAS_zhemv( ... ) BLAS_zhemv_base( __VA_ARGS__, 1 ) #else - #define BLAS_ssymv( ... ) BLAS_ssymv_base( __VA_ARGS__ ) - #define BLAS_dsymv( ... ) BLAS_dsymv_base( __VA_ARGS__ ) - //#define BLAS_csymv( ... ) BLAS_csymv_base( __VA_ARGS__ ) - //#define BLAS_zsymv( ... ) BLAS_zsymv_base( __VA_ARGS__ ) + #define BLAS_chemv( ... ) BLAS_chemv_base( __VA_ARGS__ ) + #define BLAS_zhemv( ... ) BLAS_zhemv_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_chemv_base BLAS_FORTRAN_NAME( chemv, CHEMV ) +#ifdef BLAS_PROTO void BLAS_chemv_base( char const *uplo, blas_int const *n, @@ -590,7 +661,6 @@ void BLAS_chemv_base( #endif ); -#define BLAS_zhemv_base BLAS_FORTRAN_NAME( zhemv, ZHEMV ) void BLAS_zhemv_base( char const *uplo, blas_int const *n, @@ -603,18 +673,28 @@ void BLAS_zhemv_base( , size_t uplo_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_ssyr_base BLAS_FORTRAN_NAME( ssyr, SSYR ) +#define BLAS_dsyr_base BLAS_FORTRAN_NAME( dsyr, DSYR ) +//#define BLAS_zsyr_base BLAS_FORTRAN_NAME( zsyr, ZSYR ) +//#define BLAS_csyr_base BLAS_FORTRAN_NAME( csyr, CSYR ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_chemv( ... ) BLAS_chemv_base( __VA_ARGS__, 1 ) - #define BLAS_zhemv( ... ) BLAS_zhemv_base( __VA_ARGS__, 1 ) + #define BLAS_ssyr( ... ) BLAS_ssyr_base( __VA_ARGS__, 1 ) + #define BLAS_dsyr( ... ) BLAS_dsyr_base( __VA_ARGS__, 1 ) + //#define BLAS_csyr( ... ) BLAS_csyr_base( __VA_ARGS__, 1 ) + //#define BLAS_zsyr( ... ) BLAS_zsyr_base( __VA_ARGS__, 1 ) #else - #define BLAS_chemv( ... ) BLAS_chemv_base( __VA_ARGS__ ) - #define BLAS_zhemv( ... ) BLAS_zhemv_base( __VA_ARGS__ ) + #define BLAS_ssyr( ... ) BLAS_ssyr_base( __VA_ARGS__ ) + #define BLAS_dsyr( ... ) BLAS_dsyr_base( __VA_ARGS__ ) + //#define BLAS_csyr( ... ) BLAS_csyr_base( __VA_ARGS__ ) + //#define BLAS_zsyr( ... ) BLAS_zsyr_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_ssyr_base BLAS_FORTRAN_NAME( ssyr, SSYR ) +#ifdef BLAS_PROTO void BLAS_ssyr_base( char const *uplo, blas_int const *n, @@ -626,7 +706,6 @@ void BLAS_ssyr_base( #endif ); -#define BLAS_dsyr_base BLAS_FORTRAN_NAME( dsyr, DSYR ) void BLAS_dsyr_base( char const *uplo, blas_int const *n, @@ -639,7 +718,6 @@ void BLAS_dsyr_base( ); // conflicts with current prototype in lapacke.h -//#define BLAS_csyr_base BLAS_FORTRAN_NAME( csyr, CSYR ) //void BLAS_FORTRAN_NAME( csyr, CSYR )( // char const *uplo, // blas_int const *n, @@ -651,7 +729,6 @@ void BLAS_dsyr_base( // #endif // ); // -//#define BLAS_zsyr_base BLAS_FORTRAN_NAME( zsyr, ZSYR ) //void BLAS_zsyr_base( // char const *uplo, // blas_int const *n, @@ -662,23 +739,23 @@ void BLAS_dsyr_base( // , size_t uplo_len // #endif // ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_cher_base BLAS_FORTRAN_NAME( cher, CHER ) +#define BLAS_zher_base BLAS_FORTRAN_NAME( zher, ZHER ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_ssyr( ... ) BLAS_ssyr_base( __VA_ARGS__, 1 ) - #define BLAS_dsyr( ... ) BLAS_dsyr_base( __VA_ARGS__, 1 ) - //#define BLAS_csyr( ... ) BLAS_csyr_base( __VA_ARGS__, 1 ) - //#define BLAS_zsyr( ... ) BLAS_zsyr_base( __VA_ARGS__, 1 ) + #define BLAS_cher( ... ) BLAS_cher_base( __VA_ARGS__, 1 ) + #define BLAS_zher( ... ) BLAS_zher_base( __VA_ARGS__, 1 ) #else - #define BLAS_ssyr( ... ) BLAS_ssyr_base( __VA_ARGS__ ) - #define BLAS_dsyr( ... ) BLAS_dsyr_base( __VA_ARGS__ ) - //#define BLAS_csyr( ... ) BLAS_csyr_base( __VA_ARGS__ ) - //#define BLAS_zsyr( ... ) BLAS_zsyr_base( __VA_ARGS__ ) + #define BLAS_cher( ... ) BLAS_cher_base( __VA_ARGS__ ) + #define BLAS_zher( ... ) BLAS_zher_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- +#ifdef BLAS_PROTO // alpha is real -#define BLAS_cher_base BLAS_FORTRAN_NAME( cher, CHER ) void BLAS_cher_base( char const *uplo, blas_int const *n, @@ -690,7 +767,6 @@ void BLAS_cher_base( #endif ); -#define BLAS_zher_base BLAS_FORTRAN_NAME( zher, ZHER ) void BLAS_zher_base( char const *uplo, blas_int const *n, @@ -701,19 +777,23 @@ void BLAS_zher_base( , size_t uplo_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_ssyr2_base BLAS_FORTRAN_NAME( ssyr2, SSYR2 ) +#define BLAS_dsyr2_base BLAS_FORTRAN_NAME( dsyr2, DSYR2 ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_cher( ... ) BLAS_cher_base( __VA_ARGS__, 1 ) - #define BLAS_zher( ... ) BLAS_zher_base( __VA_ARGS__, 1 ) + #define BLAS_ssyr2( ... ) BLAS_ssyr2_base( __VA_ARGS__, 1 ) + #define BLAS_dsyr2( ... ) BLAS_dsyr2_base( __VA_ARGS__, 1 ) #else - #define BLAS_cher( ... ) BLAS_cher_base( __VA_ARGS__ ) - #define BLAS_zher( ... ) BLAS_zher_base( __VA_ARGS__ ) + #define BLAS_ssyr2( ... ) BLAS_ssyr2_base( __VA_ARGS__ ) + #define BLAS_dsyr2( ... ) BLAS_dsyr2_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- +#ifdef BLAS_PROTO // [cz]syr2 not available in standard BLAS or LAPACK; use [cz]syr2k with k=1. -#define BLAS_ssyr2_base BLAS_FORTRAN_NAME( ssyr2, SSYR2 ) void BLAS_ssyr2_base( char const *uplo, blas_int const *n, @@ -726,7 +806,6 @@ void BLAS_ssyr2_base( #endif ); -#define BLAS_dsyr2_base BLAS_FORTRAN_NAME( dsyr2, DSYR2 ) void BLAS_dsyr2_base( char const *uplo, blas_int const *n, @@ -738,18 +817,22 @@ void BLAS_dsyr2_base( , size_t uplo_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_cher2_base BLAS_FORTRAN_NAME( cher2, CHER2 ) +#define BLAS_zher2_base BLAS_FORTRAN_NAME( zher2, ZHER2 ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_ssyr2( ... ) BLAS_ssyr2_base( __VA_ARGS__, 1 ) - #define BLAS_dsyr2( ... ) BLAS_dsyr2_base( __VA_ARGS__, 1 ) + #define BLAS_cher2( ... ) BLAS_cher2_base( __VA_ARGS__, 1 ) + #define BLAS_zher2( ... ) BLAS_zher2_base( __VA_ARGS__, 1 ) #else - #define BLAS_ssyr2( ... ) BLAS_ssyr2_base( __VA_ARGS__ ) - #define BLAS_dsyr2( ... ) BLAS_dsyr2_base( __VA_ARGS__ ) + #define BLAS_cher2( ... ) BLAS_cher2_base( __VA_ARGS__ ) + #define BLAS_zher2( ... ) BLAS_zher2_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_cher2_base BLAS_FORTRAN_NAME( cher2, CHER2 ) +#ifdef BLAS_PROTO void BLAS_cher2_base( char const *uplo, blas_int const *n, @@ -762,7 +845,6 @@ void BLAS_cher2_base( #endif ); -#define BLAS_zher2_base BLAS_FORTRAN_NAME( zher2, ZHER2 ) void BLAS_zher2_base( char const *uplo, blas_int const *n, @@ -774,18 +856,28 @@ void BLAS_zher2_base( , size_t uplo_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_strmv_base BLAS_FORTRAN_NAME( strmv, STRMV ) +#define BLAS_dtrmv_base BLAS_FORTRAN_NAME( dtrmv, DTRMV ) +#define BLAS_ctrmv_base BLAS_FORTRAN_NAME( ctrmv, CTRMV ) +#define BLAS_ztrmv_base BLAS_FORTRAN_NAME( ztrmv, ZTRMV ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_cher2( ... ) BLAS_cher2_base( __VA_ARGS__, 1 ) - #define BLAS_zher2( ... ) BLAS_zher2_base( __VA_ARGS__, 1 ) + #define BLAS_strmv( ... ) BLAS_strmv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_dtrmv( ... ) BLAS_dtrmv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_ctrmv( ... ) BLAS_ctrmv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_ztrmv( ... ) BLAS_ztrmv_base( __VA_ARGS__, 1, 1, 1 ) #else - #define BLAS_cher2( ... ) BLAS_cher2_base( __VA_ARGS__ ) - #define BLAS_zher2( ... ) BLAS_zher2_base( __VA_ARGS__ ) + #define BLAS_strmv( ... ) BLAS_strmv_base( __VA_ARGS__ ) + #define BLAS_dtrmv( ... ) BLAS_dtrmv_base( __VA_ARGS__ ) + #define BLAS_ctrmv( ... ) BLAS_ctrmv_base( __VA_ARGS__ ) + #define BLAS_ztrmv( ... ) BLAS_ztrmv_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_strmv_base BLAS_FORTRAN_NAME( strmv, STRMV ) +#ifdef BLAS_PROTO void BLAS_strmv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -796,7 +888,6 @@ void BLAS_strmv_base( #endif ); -#define BLAS_dtrmv_base BLAS_FORTRAN_NAME( dtrmv, DTRMV ) void BLAS_dtrmv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -807,7 +898,6 @@ void BLAS_dtrmv_base( #endif ); -#define BLAS_ctrmv_base BLAS_FORTRAN_NAME( ctrmv, CTRMV ) void BLAS_ctrmv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -818,7 +908,6 @@ void BLAS_ctrmv_base( #endif ); -#define BLAS_ztrmv_base BLAS_FORTRAN_NAME( ztrmv, ZTRMV ) void BLAS_ztrmv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -828,22 +917,28 @@ void BLAS_ztrmv_base( , size_t uplo_len, size_t trans_len, size_t diag_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_strsv_base BLAS_FORTRAN_NAME( strsv, STRSV ) +#define BLAS_dtrsv_base BLAS_FORTRAN_NAME( dtrsv, DTRSV ) +#define BLAS_ctrsv_base BLAS_FORTRAN_NAME( ctrsv, CTRSV ) +#define BLAS_ztrsv_base BLAS_FORTRAN_NAME( ztrsv, ZTRSV ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_strmv( ... ) BLAS_strmv_base( __VA_ARGS__, 1, 1, 1 ) - #define BLAS_dtrmv( ... ) BLAS_dtrmv_base( __VA_ARGS__, 1, 1, 1 ) - #define BLAS_ctrmv( ... ) BLAS_ctrmv_base( __VA_ARGS__, 1, 1, 1 ) - #define BLAS_ztrmv( ... ) BLAS_ztrmv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_strsv( ... ) BLAS_strsv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_dtrsv( ... ) BLAS_dtrsv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_ctrsv( ... ) BLAS_ctrsv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_ztrsv( ... ) BLAS_ztrsv_base( __VA_ARGS__, 1, 1, 1 ) #else - #define BLAS_strmv( ... ) BLAS_strmv_base( __VA_ARGS__ ) - #define BLAS_dtrmv( ... ) BLAS_dtrmv_base( __VA_ARGS__ ) - #define BLAS_ctrmv( ... ) BLAS_ctrmv_base( __VA_ARGS__ ) - #define BLAS_ztrmv( ... ) BLAS_ztrmv_base( __VA_ARGS__ ) + #define BLAS_strsv( ... ) BLAS_strsv_base( __VA_ARGS__ ) + #define BLAS_dtrsv( ... ) BLAS_dtrsv_base( __VA_ARGS__ ) + #define BLAS_ctrsv( ... ) BLAS_ctrsv_base( __VA_ARGS__ ) + #define BLAS_ztrsv( ... ) BLAS_ztrsv_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_strsv_base BLAS_FORTRAN_NAME( strsv, STRSV ) +#ifdef BLAS_PROTO void BLAS_strsv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -854,7 +949,6 @@ void BLAS_strsv_base( #endif ); -#define BLAS_dtrsv_base BLAS_FORTRAN_NAME( dtrsv, DTRSV ) void BLAS_dtrsv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -865,7 +959,6 @@ void BLAS_dtrsv_base( #endif ); -#define BLAS_ctrsv_base BLAS_FORTRAN_NAME( ctrsv, CTRSV ) void BLAS_ctrsv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -876,7 +969,6 @@ void BLAS_ctrsv_base( #endif ); -#define BLAS_ztrsv_base BLAS_FORTRAN_NAME( ztrsv, ZTRSV ) void BLAS_ztrsv_base( char const *uplo, char const *trans, char const *diag, blas_int const *n, @@ -886,25 +978,31 @@ void BLAS_ztrsv_base( , size_t uplo_len, size_t trans_len, size_t diag_len #endif ); +#endif // BLAS_PROTO + +// ============================================================================= +// Level 3 BLAS - Fortran prototypes + +//------------------------------------------------------------------------------ +#define BLAS_sgemm_base BLAS_FORTRAN_NAME( sgemm, SGEMM ) +#define BLAS_dgemm_base BLAS_FORTRAN_NAME( dgemm, DGEMM ) +#define BLAS_cgemm_base BLAS_FORTRAN_NAME( cgemm, CGEMM ) +#define BLAS_zgemm_base BLAS_FORTRAN_NAME( zgemm, ZGEMM ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_strsv( ... ) BLAS_strsv_base( __VA_ARGS__, 1, 1, 1 ) - #define BLAS_dtrsv( ... ) BLAS_dtrsv_base( __VA_ARGS__, 1, 1, 1 ) - #define BLAS_ctrsv( ... ) BLAS_ctrsv_base( __VA_ARGS__, 1, 1, 1 ) - #define BLAS_ztrsv( ... ) BLAS_ztrsv_base( __VA_ARGS__, 1, 1, 1 ) + #define BLAS_sgemm( ... ) BLAS_sgemm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_dgemm( ... ) BLAS_dgemm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_cgemm( ... ) BLAS_cgemm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_zgemm( ... ) BLAS_zgemm_base( __VA_ARGS__, 1, 1 ) #else - #define BLAS_strsv( ... ) BLAS_strsv_base( __VA_ARGS__ ) - #define BLAS_dtrsv( ... ) BLAS_dtrsv_base( __VA_ARGS__ ) - #define BLAS_ctrsv( ... ) BLAS_ctrsv_base( __VA_ARGS__ ) - #define BLAS_ztrsv( ... ) BLAS_ztrsv_base( __VA_ARGS__ ) + #define BLAS_sgemm( ... ) BLAS_sgemm_base( __VA_ARGS__ ) + #define BLAS_dgemm( ... ) BLAS_dgemm_base( __VA_ARGS__ ) + #define BLAS_cgemm( ... ) BLAS_cgemm_base( __VA_ARGS__ ) + #define BLAS_zgemm( ... ) BLAS_zgemm_base( __VA_ARGS__ ) #endif -// ============================================================================= -// Level 3 BLAS - Fortran prototypes - -// ----------------------------------------------------------------------------- -#define BLAS_sgemm_base BLAS_FORTRAN_NAME( sgemm, SGEMM ) +#ifdef BLAS_PROTO void BLAS_sgemm_base( char const *transA, char const *transB, blas_int const *m, blas_int const *n, blas_int const *k, @@ -918,7 +1016,6 @@ void BLAS_sgemm_base( #endif ); -#define BLAS_dgemm_base BLAS_FORTRAN_NAME( dgemm, DGEMM ) void BLAS_dgemm_base( char const *transA, char const *transB, blas_int const *m, blas_int const *n, blas_int const *k, @@ -932,7 +1029,6 @@ void BLAS_dgemm_base( #endif ); -#define BLAS_cgemm_base BLAS_FORTRAN_NAME( cgemm, CGEMM ) void BLAS_cgemm_base( char const *transA, char const *transB, blas_int const *m, blas_int const *n, blas_int const *k, @@ -946,7 +1042,6 @@ void BLAS_cgemm_base( #endif ); -#define BLAS_zgemm_base BLAS_FORTRAN_NAME( zgemm, ZGEMM ) void BLAS_zgemm_base( char const *transA, char const *transB, blas_int const *m, blas_int const *n, blas_int const *k, @@ -959,22 +1054,28 @@ void BLAS_zgemm_base( , size_t transA_len, size_t transB_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_ssymm_base BLAS_FORTRAN_NAME( ssymm, SSYMM ) +#define BLAS_dsymm_base BLAS_FORTRAN_NAME( dsymm, DSYMM ) +#define BLAS_csymm_base BLAS_FORTRAN_NAME( csymm, CSYMM ) +#define BLAS_zsymm_base BLAS_FORTRAN_NAME( zsymm, ZSYMM ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_sgemm( ... ) BLAS_sgemm_base( __VA_ARGS__, 1, 1 ) - #define BLAS_dgemm( ... ) BLAS_dgemm_base( __VA_ARGS__, 1, 1 ) - #define BLAS_cgemm( ... ) BLAS_cgemm_base( __VA_ARGS__, 1, 1 ) - #define BLAS_zgemm( ... ) BLAS_zgemm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_ssymm( ... ) BLAS_ssymm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_dsymm( ... ) BLAS_dsymm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_csymm( ... ) BLAS_csymm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_zsymm( ... ) BLAS_zsymm_base( __VA_ARGS__, 1, 1 ) #else - #define BLAS_sgemm( ... ) BLAS_sgemm_base( __VA_ARGS__ ) - #define BLAS_dgemm( ... ) BLAS_dgemm_base( __VA_ARGS__ ) - #define BLAS_cgemm( ... ) BLAS_cgemm_base( __VA_ARGS__ ) - #define BLAS_zgemm( ... ) BLAS_zgemm_base( __VA_ARGS__ ) + #define BLAS_ssymm( ... ) BLAS_ssymm_base( __VA_ARGS__ ) + #define BLAS_dsymm( ... ) BLAS_dsymm_base( __VA_ARGS__ ) + #define BLAS_csymm( ... ) BLAS_csymm_base( __VA_ARGS__ ) + #define BLAS_zsymm( ... ) BLAS_zsymm_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_ssymm_base BLAS_FORTRAN_NAME( ssymm, SSYMM ) +#ifdef BLAS_PROTO void BLAS_ssymm_base( char const *side, char const *uplo, blas_int const *m, blas_int const *n, @@ -988,7 +1089,6 @@ void BLAS_ssymm_base( #endif ); -#define BLAS_dsymm_base BLAS_FORTRAN_NAME( dsymm, DSYMM ) void BLAS_dsymm_base( char const *side, char const *uplo, blas_int const *m, blas_int const *n, @@ -1002,7 +1102,6 @@ void BLAS_dsymm_base( #endif ); -#define BLAS_csymm_base BLAS_FORTRAN_NAME( csymm, CSYMM ) void BLAS_csymm_base( char const *side, char const *uplo, blas_int const *m, blas_int const *n, @@ -1016,7 +1115,6 @@ void BLAS_csymm_base( #endif ); -#define BLAS_zsymm_base BLAS_FORTRAN_NAME( zsymm, ZSYMM ) void BLAS_zsymm_base( char const *side, char const *uplo, blas_int const *m, blas_int const *n, @@ -1029,22 +1127,22 @@ void BLAS_zsymm_base( , size_t side_len, size_t uplo_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_chemm_base BLAS_FORTRAN_NAME( chemm, CHEMM ) +#define BLAS_zhemm_base BLAS_FORTRAN_NAME( zhemm, ZHEMM ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_ssymm( ... ) BLAS_ssymm_base( __VA_ARGS__, 1, 1 ) - #define BLAS_dsymm( ... ) BLAS_dsymm_base( __VA_ARGS__, 1, 1 ) - #define BLAS_csymm( ... ) BLAS_csymm_base( __VA_ARGS__, 1, 1 ) - #define BLAS_zsymm( ... ) BLAS_zsymm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_chemm( ... ) BLAS_chemm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_zhemm( ... ) BLAS_zhemm_base( __VA_ARGS__, 1, 1 ) #else - #define BLAS_ssymm( ... ) BLAS_ssymm_base( __VA_ARGS__ ) - #define BLAS_dsymm( ... ) BLAS_dsymm_base( __VA_ARGS__ ) - #define BLAS_csymm( ... ) BLAS_csymm_base( __VA_ARGS__ ) - #define BLAS_zsymm( ... ) BLAS_zsymm_base( __VA_ARGS__ ) + #define BLAS_chemm( ... ) BLAS_chemm_base( __VA_ARGS__ ) + #define BLAS_zhemm( ... ) BLAS_zhemm_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_chemm_base BLAS_FORTRAN_NAME( chemm, CHEMM ) +#ifdef BLAS_PROTO void BLAS_chemm_base( char const *side, char const *uplo, blas_int const *m, blas_int const *n, @@ -1058,7 +1156,6 @@ void BLAS_chemm_base( #endif ); -#define BLAS_zhemm_base BLAS_FORTRAN_NAME( zhemm, ZHEMM ) void BLAS_zhemm_base( char const *side, char const *uplo, blas_int const *m, blas_int const *n, @@ -1071,18 +1168,28 @@ void BLAS_zhemm_base( , size_t side_len, size_t uplo_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_ssyrk_base BLAS_FORTRAN_NAME( ssyrk, SSYRK ) +#define BLAS_dsyrk_base BLAS_FORTRAN_NAME( dsyrk, DSYRK ) +#define BLAS_csyrk_base BLAS_FORTRAN_NAME( csyrk, CSYRK ) +#define BLAS_zsyrk_base BLAS_FORTRAN_NAME( zsyrk, ZSYRK ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_chemm( ... ) BLAS_chemm_base( __VA_ARGS__, 1, 1 ) - #define BLAS_zhemm( ... ) BLAS_zhemm_base( __VA_ARGS__, 1, 1 ) + #define BLAS_ssyrk( ... ) BLAS_ssyrk_base( __VA_ARGS__, 1, 1 ) + #define BLAS_dsyrk( ... ) BLAS_dsyrk_base( __VA_ARGS__, 1, 1 ) + #define BLAS_csyrk( ... ) BLAS_csyrk_base( __VA_ARGS__, 1, 1 ) + #define BLAS_zsyrk( ... ) BLAS_zsyrk_base( __VA_ARGS__, 1, 1 ) #else - #define BLAS_chemm( ... ) BLAS_chemm_base( __VA_ARGS__ ) - #define BLAS_zhemm( ... ) BLAS_zhemm_base( __VA_ARGS__ ) + #define BLAS_ssyrk( ... ) BLAS_ssyrk_base( __VA_ARGS__ ) + #define BLAS_dsyrk( ... ) BLAS_dsyrk_base( __VA_ARGS__ ) + #define BLAS_csyrk( ... ) BLAS_csyrk_base( __VA_ARGS__ ) + #define BLAS_zsyrk( ... ) BLAS_zsyrk_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_ssyrk_base BLAS_FORTRAN_NAME( ssyrk, SSYRK ) +#ifdef BLAS_PROTO void BLAS_ssyrk_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1095,7 +1202,6 @@ void BLAS_ssyrk_base( #endif ); -#define BLAS_dsyrk_base BLAS_FORTRAN_NAME( dsyrk, DSYRK ) void BLAS_dsyrk_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1108,7 +1214,6 @@ void BLAS_dsyrk_base( #endif ); -#define BLAS_csyrk_base BLAS_FORTRAN_NAME( csyrk, CSYRK ) void BLAS_csyrk_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1121,7 +1226,6 @@ void BLAS_csyrk_base( #endif ); -#define BLAS_zsyrk_base BLAS_FORTRAN_NAME( zsyrk, ZSYRK ) void BLAS_zsyrk_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1133,23 +1237,23 @@ void BLAS_zsyrk_base( , size_t uplo_len, size_t transA_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_cherk_base BLAS_FORTRAN_NAME( cherk, CHERK ) +#define BLAS_zherk_base BLAS_FORTRAN_NAME( zherk, ZHERK ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_ssyrk( ... ) BLAS_ssyrk_base( __VA_ARGS__, 1, 1 ) - #define BLAS_dsyrk( ... ) BLAS_dsyrk_base( __VA_ARGS__, 1, 1 ) - #define BLAS_csyrk( ... ) BLAS_csyrk_base( __VA_ARGS__, 1, 1 ) - #define BLAS_zsyrk( ... ) BLAS_zsyrk_base( __VA_ARGS__, 1, 1 ) + #define BLAS_cherk( ... ) BLAS_cherk_base( __VA_ARGS__, 1, 1 ) + #define BLAS_zherk( ... ) BLAS_zherk_base( __VA_ARGS__, 1, 1 ) #else - #define BLAS_ssyrk( ... ) BLAS_ssyrk_base( __VA_ARGS__ ) - #define BLAS_dsyrk( ... ) BLAS_dsyrk_base( __VA_ARGS__ ) - #define BLAS_csyrk( ... ) BLAS_csyrk_base( __VA_ARGS__ ) - #define BLAS_zsyrk( ... ) BLAS_zsyrk_base( __VA_ARGS__ ) + #define BLAS_cherk( ... ) BLAS_cherk_base( __VA_ARGS__ ) + #define BLAS_zherk( ... ) BLAS_zherk_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- +#ifdef BLAS_PROTO // alpha and beta are real -#define BLAS_cherk_base BLAS_FORTRAN_NAME( cherk, CHERK ) void BLAS_cherk_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1162,7 +1266,6 @@ void BLAS_cherk_base( #endif ); -#define BLAS_zherk_base BLAS_FORTRAN_NAME( zherk, ZHERK ) void BLAS_zherk_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1174,18 +1277,28 @@ void BLAS_zherk_base( , size_t uplo_len, size_t transA_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_ssyr2k_base BLAS_FORTRAN_NAME( ssyr2k, SSYR2K ) +#define BLAS_dsyr2k_base BLAS_FORTRAN_NAME( dsyr2k, DSYR2K ) +#define BLAS_csyr2k_base BLAS_FORTRAN_NAME( csyr2k, CSYR2K ) +#define BLAS_zsyr2k_base BLAS_FORTRAN_NAME( zsyr2k, ZSYR2K ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_cherk( ... ) BLAS_cherk_base( __VA_ARGS__, 1, 1 ) - #define BLAS_zherk( ... ) BLAS_zherk_base( __VA_ARGS__, 1, 1 ) + #define BLAS_ssyr2k( ... ) BLAS_ssyr2k_base( __VA_ARGS__, 1, 1 ) + #define BLAS_dsyr2k( ... ) BLAS_dsyr2k_base( __VA_ARGS__, 1, 1 ) + #define BLAS_csyr2k( ... ) BLAS_csyr2k_base( __VA_ARGS__, 1, 1 ) + #define BLAS_zsyr2k( ... ) BLAS_zsyr2k_base( __VA_ARGS__, 1, 1 ) #else - #define BLAS_cherk( ... ) BLAS_cherk_base( __VA_ARGS__ ) - #define BLAS_zherk( ... ) BLAS_zherk_base( __VA_ARGS__ ) + #define BLAS_ssyr2k( ... ) BLAS_ssyr2k_base( __VA_ARGS__ ) + #define BLAS_dsyr2k( ... ) BLAS_dsyr2k_base( __VA_ARGS__ ) + #define BLAS_csyr2k( ... ) BLAS_csyr2k_base( __VA_ARGS__ ) + #define BLAS_zsyr2k( ... ) BLAS_zsyr2k_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_ssyr2k_base BLAS_FORTRAN_NAME( ssyr2k, SSYR2K ) +#ifdef BLAS_PROTO void BLAS_ssyr2k_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1199,7 +1312,6 @@ void BLAS_ssyr2k_base( #endif ); -#define BLAS_dsyr2k_base BLAS_FORTRAN_NAME( dsyr2k, DSYR2K ) void BLAS_dsyr2k_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1213,7 +1325,6 @@ void BLAS_dsyr2k_base( #endif ); -#define BLAS_csyr2k_base BLAS_FORTRAN_NAME( csyr2k, CSYR2K ) void BLAS_csyr2k_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1227,7 +1338,6 @@ void BLAS_csyr2k_base( #endif ); -#define BLAS_zsyr2k_base BLAS_FORTRAN_NAME( zsyr2k, ZSYR2K ) void BLAS_zsyr2k_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1240,23 +1350,23 @@ void BLAS_zsyr2k_base( , size_t uplo_len, size_t transA_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_cher2k_base BLAS_FORTRAN_NAME( cher2k, CHER2K ) +#define BLAS_zher2k_base BLAS_FORTRAN_NAME( zher2k, ZHER2K ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_ssyr2k( ... ) BLAS_ssyr2k_base( __VA_ARGS__, 1, 1 ) - #define BLAS_dsyr2k( ... ) BLAS_dsyr2k_base( __VA_ARGS__, 1, 1 ) - #define BLAS_csyr2k( ... ) BLAS_csyr2k_base( __VA_ARGS__, 1, 1 ) - #define BLAS_zsyr2k( ... ) BLAS_zsyr2k_base( __VA_ARGS__, 1, 1 ) + #define BLAS_cher2k( ... ) BLAS_cher2k_base( __VA_ARGS__, 1, 1 ) + #define BLAS_zher2k( ... ) BLAS_zher2k_base( __VA_ARGS__, 1, 1 ) #else - #define BLAS_ssyr2k( ... ) BLAS_ssyr2k_base( __VA_ARGS__ ) - #define BLAS_dsyr2k( ... ) BLAS_dsyr2k_base( __VA_ARGS__ ) - #define BLAS_csyr2k( ... ) BLAS_csyr2k_base( __VA_ARGS__ ) - #define BLAS_zsyr2k( ... ) BLAS_zsyr2k_base( __VA_ARGS__ ) + #define BLAS_cher2k( ... ) BLAS_cher2k_base( __VA_ARGS__ ) + #define BLAS_zher2k( ... ) BLAS_zher2k_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- +#ifdef BLAS_PROTO // beta is real -#define BLAS_cher2k_base BLAS_FORTRAN_NAME( cher2k, CHER2K ) void BLAS_cher2k_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1270,7 +1380,6 @@ void BLAS_cher2k_base( #endif ); -#define BLAS_zher2k_base BLAS_FORTRAN_NAME( zher2k, ZHER2K ) void BLAS_zher2k_base( char const *uplo, char const *transA, blas_int const *n, blas_int const *k, @@ -1283,18 +1392,28 @@ void BLAS_zher2k_base( , size_t uplo_len, size_t transA_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_strmm_base BLAS_FORTRAN_NAME( strmm, STRMM ) +#define BLAS_dtrmm_base BLAS_FORTRAN_NAME( dtrmm, DTRMM ) +#define BLAS_ctrmm_base BLAS_FORTRAN_NAME( ctrmm, CTRMM ) +#define BLAS_ztrmm_base BLAS_FORTRAN_NAME( ztrmm, ZTRMM ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_cher2k( ... ) BLAS_cher2k_base( __VA_ARGS__, 1, 1 ) - #define BLAS_zher2k( ... ) BLAS_zher2k_base( __VA_ARGS__, 1, 1 ) + #define BLAS_strmm( ... ) BLAS_strmm_base( __VA_ARGS__, 1, 1, 1, 1 ) + #define BLAS_dtrmm( ... ) BLAS_dtrmm_base( __VA_ARGS__, 1, 1, 1, 1 ) + #define BLAS_ctrmm( ... ) BLAS_ctrmm_base( __VA_ARGS__, 1, 1, 1, 1 ) + #define BLAS_ztrmm( ... ) BLAS_ztrmm_base( __VA_ARGS__, 1, 1, 1, 1 ) #else - #define BLAS_cher2k( ... ) BLAS_cher2k_base( __VA_ARGS__ ) - #define BLAS_zher2k( ... ) BLAS_zher2k_base( __VA_ARGS__ ) + #define BLAS_strmm( ... ) BLAS_strmm_base( __VA_ARGS__ ) + #define BLAS_dtrmm( ... ) BLAS_dtrmm_base( __VA_ARGS__ ) + #define BLAS_ctrmm( ... ) BLAS_ctrmm_base( __VA_ARGS__ ) + #define BLAS_ztrmm( ... ) BLAS_ztrmm_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_strmm_base BLAS_FORTRAN_NAME( strmm, STRMM ) +#ifdef BLAS_PROTO void BLAS_strmm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1306,7 +1425,6 @@ void BLAS_strmm_base( #endif ); -#define BLAS_dtrmm_base BLAS_FORTRAN_NAME( dtrmm, DTRMM ) void BLAS_dtrmm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1318,7 +1436,6 @@ void BLAS_dtrmm_base( #endif ); -#define BLAS_ctrmm_base BLAS_FORTRAN_NAME( ctrmm, CTRMM ) void BLAS_ctrmm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1330,7 +1447,6 @@ void BLAS_ctrmm_base( #endif ); -#define BLAS_ztrmm_base BLAS_FORTRAN_NAME( ztrmm, ZTRMM ) void BLAS_ztrmm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1341,22 +1457,28 @@ void BLAS_ztrmm_base( , size_t side_len, size_t uplo_len, size_t trans_len, size_t diag_len #endif ); +#endif // BLAS_PROTO + +//------------------------------------------------------------------------------ +#define BLAS_strsm_base BLAS_FORTRAN_NAME( strsm, STRSM ) +#define BLAS_dtrsm_base BLAS_FORTRAN_NAME( dtrsm, DTRSM ) +#define BLAS_ctrsm_base BLAS_FORTRAN_NAME( ctrsm, CTRSM ) +#define BLAS_ztrsm_base BLAS_FORTRAN_NAME( ztrsm, ZTRSM ) #ifdef BLAS_FORTRAN_STRLEN_END // Pass 1 for string lengths. - #define BLAS_strmm( ... ) BLAS_strmm_base( __VA_ARGS__, 1, 1, 1, 1 ) - #define BLAS_dtrmm( ... ) BLAS_dtrmm_base( __VA_ARGS__, 1, 1, 1, 1 ) - #define BLAS_ctrmm( ... ) BLAS_ctrmm_base( __VA_ARGS__, 1, 1, 1, 1 ) - #define BLAS_ztrmm( ... ) BLAS_ztrmm_base( __VA_ARGS__, 1, 1, 1, 1 ) + #define BLAS_strsm( ... ) BLAS_strsm_base( __VA_ARGS__, 1, 1, 1, 1 ) + #define BLAS_dtrsm( ... ) BLAS_dtrsm_base( __VA_ARGS__, 1, 1, 1, 1 ) + #define BLAS_ctrsm( ... ) BLAS_ctrsm_base( __VA_ARGS__, 1, 1, 1, 1 ) + #define BLAS_ztrsm( ... ) BLAS_ztrsm_base( __VA_ARGS__, 1, 1, 1, 1 ) #else - #define BLAS_strmm( ... ) BLAS_strmm_base( __VA_ARGS__ ) - #define BLAS_dtrmm( ... ) BLAS_dtrmm_base( __VA_ARGS__ ) - #define BLAS_ctrmm( ... ) BLAS_ctrmm_base( __VA_ARGS__ ) - #define BLAS_ztrmm( ... ) BLAS_ztrmm_base( __VA_ARGS__ ) + #define BLAS_strsm( ... ) BLAS_strsm_base( __VA_ARGS__ ) + #define BLAS_dtrsm( ... ) BLAS_dtrsm_base( __VA_ARGS__ ) + #define BLAS_ctrsm( ... ) BLAS_ctrsm_base( __VA_ARGS__ ) + #define BLAS_ztrsm( ... ) BLAS_ztrsm_base( __VA_ARGS__ ) #endif -// ----------------------------------------------------------------------------- -#define BLAS_strsm_base BLAS_FORTRAN_NAME( strsm, STRSM ) +#ifdef BLAS_PROTO void BLAS_strsm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1368,7 +1490,6 @@ void BLAS_strsm_base( #endif ); -#define BLAS_dtrsm_base BLAS_FORTRAN_NAME( dtrsm, DTRSM ) void BLAS_dtrsm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1380,7 +1501,6 @@ void BLAS_dtrsm_base( #endif ); -#define BLAS_ctrsm_base BLAS_FORTRAN_NAME( ctrsm, CTRSM ) void BLAS_ctrsm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1392,7 +1512,6 @@ void BLAS_ctrsm_base( #endif ); -#define BLAS_ztrsm_base BLAS_FORTRAN_NAME( ztrsm, ZTRSM ) void BLAS_ztrsm_base( char const *side, char const *uplo, char const *trans, char const *diag, blas_int const *m, blas_int const *n, @@ -1403,19 +1522,7 @@ void BLAS_ztrsm_base( , size_t side_len, size_t uplo_len, size_t trans_len, size_t diag_len #endif ); - -#ifdef BLAS_FORTRAN_STRLEN_END - // Pass 1 for string lengths. - #define BLAS_strsm( ... ) BLAS_strsm_base( __VA_ARGS__, 1, 1, 1, 1 ) - #define BLAS_dtrsm( ... ) BLAS_dtrsm_base( __VA_ARGS__, 1, 1, 1, 1 ) - #define BLAS_ctrsm( ... ) BLAS_ctrsm_base( __VA_ARGS__, 1, 1, 1, 1 ) - #define BLAS_ztrsm( ... ) BLAS_ztrsm_base( __VA_ARGS__, 1, 1, 1, 1 ) -#else - #define BLAS_strsm( ... ) BLAS_strsm_base( __VA_ARGS__ ) - #define BLAS_dtrsm( ... ) BLAS_dtrsm_base( __VA_ARGS__ ) - #define BLAS_ctrsm( ... ) BLAS_ctrsm_base( __VA_ARGS__ ) - #define BLAS_ztrsm( ... ) BLAS_ztrsm_base( __VA_ARGS__ ) -#endif +#endif // BLAS_PROTO #ifdef __cplusplus } // #endif From 8ed2b8049155fc1aec9f46cb5248a0e84a78d5e3 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Fri, 1 Dec 2023 00:36:32 -0500 Subject: [PATCH 2/7] config: search for Apple's new Accelerate --- config/blas.cc | 20 +++++++++++++------- config/lapack.py | 45 +++++++++++++++++++++++++++++++++------------ 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/config/blas.cc b/config/blas.cc index bf6e4b13..c423df16 100644 --- a/config/blas.cc +++ b/config/blas.cc @@ -11,14 +11,20 @@ //------------------------------------------------------------------------------ #define BLAS_ddot FORTRAN_NAME( ddot, DDOT ) -// result return directly -#ifdef __cplusplus -extern "C" +#ifdef ACCELERATE_NEW_LAPACK + #pragma message "Including Accelerate.h" + #include +#else + #pragma message "Defining BLAS_ddot" + // result return directly + #ifdef __cplusplus + extern "C" + #endif + double BLAS_ddot( + const blas_int* n, + const double* x, const blas_int* incx, + const double* y, const blas_int* incy ); #endif -double BLAS_ddot( - const blas_int* n, - const double* x, const blas_int* incx, - const double* y, const blas_int* incy ); //------------------------------------------------------------------------------ int main() diff --git a/config/lapack.py b/config/lapack.py index cfaeabb5..01b23ab2 100644 --- a/config/lapack.py +++ b/config/lapack.py @@ -335,21 +335,23 @@ def blas(): #-------------------- Apple Accelerate if (test_all or test_accelerate): - # macOS puts cblas.h in weird places. - paths = [ - '/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers', - '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Headers', - ] - inc = '' - for p in paths: - if (os.path.exists( p + '/cblas.h' )): - inc = '-I' + p + ' ' + define('HAVE_ACCELERATE_CBLAS_H') - break + choices.append( + ['macOS Accelerate (new)', + {'LIBS': '-framework Accelerate', + 'CXXFLAGS': define('HAVE_ACCELERATE') + + ' -DACCELERATE_NEW_LAPACK'}]) + + # macOS 13.3, g++ 12.2 requires extra flag to parse Apple's headers. + choices.append( + ['macOS Accelerate (new, -flax-vector-conversions)', + {'LIBS': '-framework Accelerate', + 'CXXFLAGS': define('HAVE_ACCELERATE') + + ' -DACCELERATE_NEW_LAPACK -flax-vector-conversions'}]) choices.append( - ['MacOS Accelerate', + ['macOS Accelerate', {'LIBS': '-framework Accelerate', - 'CXXFLAGS': inc + define('HAVE_ACCELERATE')}]) + 'CXXFLAGS': define('HAVE_ACCELERATE')}]) # end #-------------------- generic -lblas @@ -401,6 +403,25 @@ def cblas(): ['CBLAS (cblas_ddot) in -lcblas', {'LIBS': '-lcblas'}], ] + LIBS = config.environ['LIBS'] + if ('-framework Accelerate' in LIBS): + # macOS puts cblas.h in weird places; add -I for path. + # Insert as 2nd choice, so it won't be used if 1st choice above works. + # On macOS 13, cblas.h seems to be in the compiler's default search + # path, so this is no longer needed. + paths = [ + '/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers', + '/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers', + '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Headers', + ] + for p in paths: + if (os.path.exists( p + '/cblas.h' )): + inc = '-I' + p + ' ' + define('HAVE_ACCELERATE_CBLAS_H') + choices.insert( 1, ['CBLAS (cblas_ddot) in BLAS library, -I' + p, + {'CXXFLAGS': inc}] ) + break + # end + passed = [] for (label, env) in choices: (rc, out, err) = config.compile_run( 'config/cblas.cc', env, label ) From 28fd5e0e69994ee6a6bdf8dd8dc902ef6a10c9dc Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Thu, 19 Sep 2024 01:09:46 -0400 Subject: [PATCH 3/7] TMP config: detect old and new Accelerate --- INSTALL.md | 6 ++--- config/blas.cc | 21 ++++++++++++---- config/cblas.cc | 5 ++++ config/config.h | 34 ++++++++++++++++++++------ config/config.py | 33 ++++++++++++++++++------- config/lapack.py | 40 +++++++++++++++++++++---------- config/lapack_potrf.cc | 30 +++++++++++++---------- config/lapack_pstrf.cc | 36 ++++++++++++++++------------ config/return_complex.cc | 19 +++++++++------ config/return_complex_argument.cc | 21 +++++++++------- config/return_float.cc | 24 ++++++++++++++----- config/return_float_f2c.cc | 19 ++++++++++----- configure.py | 1 + include/blas/fortran.h | 7 +++++- 14 files changed, 205 insertions(+), 91 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index e5d75507..7e6cc59e 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -326,6 +326,6 @@ them. See also `blas_int` above. blas_return_float_f2c Whether sdot, etc., returns float (usual convention) or double - (f2c convention used in CLAPACK and macOS Accelerate). - no returns float. Default except for macOS Accelerate. - yes returns double. Default for macOS Accelerate. + (f2c convention used in CLAPACK and old macOS Accelerate before macOS 13.3). + no returns float. Default except for old macOS Accelerate. + yes returns double. Default for old macOS Accelerate. diff --git a/config/blas.cc b/config/blas.cc index c423df16..e3522a67 100644 --- a/config/blas.cc +++ b/config/blas.cc @@ -9,14 +9,15 @@ #include "config.h" //------------------------------------------------------------------------------ +#define BLAS_sdot FORTRAN_NAME( sdot, SDOT ) #define BLAS_ddot FORTRAN_NAME( ddot, DDOT ) -#ifdef ACCELERATE_NEW_LAPACK - #pragma message "Including Accelerate.h" +#ifdef BLAS_HAVE_ACCELERATE +//ACCELERATE_NEW_LAPACK + #pragma message "include Accelerate.h" #include #else - #pragma message "Defining BLAS_ddot" - // result return directly + // result returned directly #ifdef __cplusplus extern "C" #endif @@ -44,8 +45,18 @@ int main() double result = BLAS_ddot( n, x, &ione, y, &ione ); printf( "result = %.1f; should be 35.0\n", result ); - bool okay = (result == 35); + + #ifdef ACCELERATE_NEW_LAPACK + // To verify this is new Accelerate, check the return type of sdot. + float sx[] = { 1, 2, 3, 4, 5 }; + float sy[] = { 5, 4, 3, 2, 1 }; + auto r = BLAS_sdot( n, sx, &ione, sy, &ione ); + static_assert( std::is_same< float, decltype( r ) >::value, + "With new Accelerate, sdot returns float" + " -> this is old Accelerate" ); + #endif + printf( "%s\n", okay ? "ok" : "failed" ); return ! okay; } diff --git a/config/cblas.cc b/config/cblas.cc index 0fb4786b..9004d7bb 100644 --- a/config/cblas.cc +++ b/config/cblas.cc @@ -5,13 +5,16 @@ #include +//------------------------------------------------------------------------------ #if defined(BLAS_HAVE_MKL) + #pragma message "include mkl_cblas.h" #if defined(BLAS_ILP64) && ! defined(MKL_ILP64) #define MKL_ILP64 #endif #include #elif defined(BLAS_HAVE_ESSL) + #pragma message "include essl.h" #if defined(BLAS_ILP64) && ! defined(_ESV6464) #define _ESV6464 #endif @@ -22,8 +25,10 @@ // Unfortunately with Xcode 10.3 and GNU g++ 9.3, that doesn't compile. // If we can find cblas.h, use it, otherwise use Accelerate.h. #ifdef BLAS_HAVE_ACCELERATE_CBLAS_H + #pragma message "include cblas.h for Accelerate" #include #else + #pragma message "include Accelerate.h" #include #endif #else diff --git a/config/config.h b/config/config.h index 37b0b680..58c35f85 100644 --- a/config/config.h +++ b/config/config.h @@ -10,8 +10,10 @@ //------------------------------------------------------------------------------ #if defined(FORTRAN_UPPER) + #pragma message "Fortran upper" #define FORTRAN_NAME( lower, UPPER ) UPPER #elif defined(FORTRAN_LOWER) + #pragma message "Fortran lower" #define FORTRAN_NAME( lower, UPPER ) lower #else // default is ADD_ @@ -20,20 +22,38 @@ //------------------------------------------------------------------------------ #if defined(BLAS_ILP64) || defined(LAPACK_ILP64) - typedef int64_t blas_int; - typedef int64_t lapack_int; + // long is >= 32 bits, long long is >= 64 bits + // macOS Accelerate uses long, Intel MKL uses long long, + // prefer int64_t (which can be long or long long). + #ifdef BLAS_HAVE_ACCELERATE + #pragma message "Accelerate ilp64 (long)" + #define ACCELERATE_LAPACK_ILP64 + typedef long blas_int; + typedef long lapack_int; + #else + #pragma message "ilp64 (int64_t)" + typedef int64_t blas_int; + typedef int64_t lapack_int; + #endif #else typedef int blas_int; typedef int lapack_int; #endif //------------------------------------------------------------------------------ -#ifndef BLAS_FORTRAN_STRLEN_END -#define BLAS_FORTRAN_STRLEN_END -#endif +#ifdef BLAS_HAVE_ACCELERATE + // Neither old nor new macOS Accelerate API passes strlen. + #pragma message "Accelerate undef strlen" + #undef BLAS_FORTRAN_STRLEN_END + #undef LAPACK_FORTRAN_STRLEN_END +#else + #ifndef BLAS_FORTRAN_STRLEN_END + #define BLAS_FORTRAN_STRLEN_END + #endif -#ifndef LAPACK_FORTRAN_STRLEN_END -#define LAPACK_FORTRAN_STRLEN_END + #ifndef LAPACK_FORTRAN_STRLEN_END + #define LAPACK_FORTRAN_STRLEN_END + #endif #endif #endif // CONFIG_H diff --git a/config/config.py b/config/config.py index 71d923d9..70a21683 100644 --- a/config/config.py +++ b/config/config.py @@ -191,25 +191,40 @@ def print_test( label ): ''' if (label): print( '-'*20 + '\n' + label, file=log ) - print( '%-72s' % label, end='' ) + if (len( label ) > 72): + print( label ) + label = '' + print( '%-72s ' % label, end='' ) sys.stdout.flush() # end +#------------------------------------------------------------------------------- +def print_value( label, value ): + ''' + If label is given, prints the value and newline. + If no label is given, does nothing. + @see print_test(), print_result(). + ''' + if (label): + print( value, file=log ) + print( value ) +# end + #------------------------------------------------------------------------------- def print_result( label, rc, extra='' ): ''' If label is given, prints either "yes" (if rc == 0) or "no" (otherwise). Extra is printed after yes or no. If no label is given, does nothing. - @see print_test(). + @see print_test(), print_value(). ''' - if (label): - if (rc == 0): - print( font.blue( 'yes' ), extra, file=log ) - print( font.blue( ' yes' ), extra ) - else: - print( font.red( 'no' ), extra, file=log ) - print( font.red( ' no' ), extra ) + if (rc == 0): + value = font.blue( 'yes' ) + else: + value = font.red( 'no' ) + if (extra): + value += ' ' + extra + print_value( label, value ) # end # ------------------------------------------------------------------------------ diff --git a/config/lapack.py b/config/lapack.py index 01b23ab2..bb5c86d4 100644 --- a/config/lapack.py +++ b/config/lapack.py @@ -9,7 +9,8 @@ import re import config from config import print_header, print_subhead, print_msg, print_warn, \ - print_test, print_result, define, Error, get + print_test, print_value, print_result, define, Error, \ + font, get #------------------------------------------------------------------------------- def get_fortran_manglings(): @@ -335,23 +336,30 @@ def blas(): #-------------------- Apple Accelerate if (test_all or test_accelerate): + flags = define('HAVE_ACCELERATE') + new_lapack = ' -DACCELERATE_NEW_LAPACK' choices.append( ['macOS Accelerate (new)', {'LIBS': '-framework Accelerate', - 'CXXFLAGS': define('HAVE_ACCELERATE') - + ' -DACCELERATE_NEW_LAPACK'}]) + 'CXXFLAGS': flags + new_lapack }]) # macOS 13.3, g++ 12.2 requires extra flag to parse Apple's headers. + version = ' -mmacosx-version-min=13.3' + extra = ' -flax-vector-conversions' choices.append( ['macOS Accelerate (new, -flax-vector-conversions)', + {'LIBS': '-framework Accelerate ', + 'CXXFLAGS': flags + new_lapack + version + extra }]) + + choices.append( + ['macOS Accelerate (old, pre 13.3)', {'LIBS': '-framework Accelerate', - 'CXXFLAGS': define('HAVE_ACCELERATE') - + ' -DACCELERATE_NEW_LAPACK -flax-vector-conversions'}]) + 'CXXFLAGS': flags }]) choices.append( - ['macOS Accelerate', + ['macOS Accelerate (old, pre 13.3, -flax-vector-conversions)', {'LIBS': '-framework Accelerate', - 'CXXFLAGS': define('HAVE_ACCELERATE')}]) + 'CXXFLAGS': flags + extra}]) # end #-------------------- generic -lblas @@ -546,13 +554,13 @@ def blas_float_return(): ''' (rc, out, err) = config.compile_run( 'config/return_float.cc', {}, - 'BLAS (sdot) returns float as float (standard)' ) + 'BLAS (sdot) returns float (standard)' ) if (rc == 0): return (rc, out, err) = config.compile_run( 'config/return_float_f2c.cc', {}, - 'BLAS (sdot) returns float as double (f2c convention)' ) + 'BLAS (sdot) returns double (f2c convention)' ) if (rc == 0): config.environ.append( 'CXXFLAGS', define('HAVE_F2C') ) else: @@ -589,11 +597,19 @@ def lapack_version(): (rc, out, err) = config.compile_run( 'config/lapack_version.cc' ) s = re.search( r'^LAPACK_VERSION=((\d+)\.(\d+)\.(\d+))', out ) if (rc == 0 and s): - v = '%d%02d%02d' % (int(s.group(2)), int(s.group(3)), int(s.group(4))) + major = int( s.group( 2 ) ) + minor = int( s.group( 3 ) ) + patch = int( s.group( 4 ) ) + # Sanity checks may catch ilp64 error. + assert 3 <= major <= 4, "Expected LAPACK version 3 (current) or 4 (future), got version " + str( major ) + assert 0 <= minor <= 100, "Expected LAPACK minor in 0-100, got " + str( minor ) + assert 0 <= minor <= 100, "Expected LAPACK patch in 0-100, got " + str( patch ) + v = '%d%02d%02d' % (major, minor, patch) config.environ.append( 'CXXFLAGS', define('LAPACK_VERSION', v) ) - config.print_result( 'LAPACK', rc, '(' + s.group(1) + ')' ) + config.environ.append( 'LAPACK_VERSION', v ) + config.print_value( 'LAPACK', font.blue( s.group(1) ) ) else: - config.print_result( 'LAPACK', rc ) + config.print_value( 'LAPACK', font.red( 'unknown' ) ) # end #------------------------------------------------------------------------------- diff --git a/config/lapack_potrf.cc b/config/lapack_potrf.cc index a1996d77..38beaedc 100644 --- a/config/lapack_potrf.cc +++ b/config/lapack_potrf.cc @@ -8,26 +8,32 @@ #include "config.h" +//------------------------------------------------------------------------------ #define LAPACK_dpotrf_base FORTRAN_NAME( dpotrf, DPOTRF ) -#ifdef __cplusplus -extern "C" -#endif -void LAPACK_dpotrf_base( - const char* uplo, const lapack_int* n, - double* A, const lapack_int* lda, - lapack_int* info - #ifdef LAPACK_FORTRAN_STRLEN_END - , size_t uplo_len - #endif - ); - #ifdef LAPACK_FORTRAN_STRLEN_END #define LAPACK_dpotrf( ... ) LAPACK_dpotrf_base( __VA_ARGS__, 1 ) #else #define LAPACK_dpotrf( ... ) LAPACK_dpotrf_base( __VA_ARGS__ ) #endif +#ifdef ACCELERATE_NEW_LAPACK + #pragma message "include Accelerate.h" + #include +#else + #ifdef __cplusplus + extern "C" + #endif + void LAPACK_dpotrf_base( + const char* uplo, const lapack_int* n, + double* A, const lapack_int* lda, + lapack_int* info + #ifdef LAPACK_FORTRAN_STRLEN_END + , size_t uplo_len + #endif + ); +#endif + //------------------------------------------------------------------------------ int main() { diff --git a/config/lapack_pstrf.cc b/config/lapack_pstrf.cc index fcec7541..70b176b6 100644 --- a/config/lapack_pstrf.cc +++ b/config/lapack_pstrf.cc @@ -8,29 +8,35 @@ #include "config.h" +//------------------------------------------------------------------------------ #define LAPACK_dpstrf_base FORTRAN_NAME( dpstrf, DPSTRF ) -#ifdef __cplusplus -extern "C" -#endif -void LAPACK_dpstrf_base( - const char* uplo, const lapack_int* n, - double* A, const lapack_int* lda, - lapack_int* ipiv, lapack_int* rank, - const double* tol, - double* work, - lapack_int* info - #ifdef LAPACK_FORTRAN_STRLEN_END - , size_t uplo_len - #endif - ); - #ifdef LAPACK_FORTRAN_STRLEN_END #define LAPACK_dpstrf( ... ) LAPACK_dpstrf_base( __VA_ARGS__, 1 ) #else #define LAPACK_dpstrf( ... ) LAPACK_dpstrf_base( __VA_ARGS__ ) #endif +#ifdef ACCELERATE_NEW_LAPACK + #pragma message "include Accelerate.h" + #include +#else + #ifdef __cplusplus + extern "C" + #endif + void LAPACK_dpstrf_base( + const char* uplo, const lapack_int* n, + double* A, const lapack_int* lda, + lapack_int* ipiv, lapack_int* rank, + const double* tol, + double* work, + lapack_int* info + #ifdef LAPACK_FORTRAN_STRLEN_END + , size_t uplo_len + #endif + ); +#endif + //------------------------------------------------------------------------------ int main() { diff --git a/config/return_complex.cc b/config/return_complex.cc index b89fac6d..e8696db4 100644 --- a/config/return_complex.cc +++ b/config/return_complex.cc @@ -14,14 +14,19 @@ //------------------------------------------------------------------------------ #define BLAS_zdotc FORTRAN_NAME( zdotc, ZDOTC ) -// result return directly -#ifdef __cplusplus -extern "C" +#ifdef ACCELERATE_NEW_LAPACK + #pragma message "include Accelerate.h" + #include +#else + // result returned directly + #ifdef __cplusplus + extern "C" + #endif + double _Complex BLAS_zdotc( + const blas_int* n, + const std::complex* x, const blas_int* incx, + const std::complex* y, const blas_int* incy ); #endif -double _Complex BLAS_zdotc( - const blas_int* n, - const std::complex* x, const blas_int* incx, - const std::complex* y, const blas_int* incy ); //------------------------------------------------------------------------------ int main() diff --git a/config/return_complex_argument.cc b/config/return_complex_argument.cc index 1bcc6644..b00eb161 100644 --- a/config/return_complex_argument.cc +++ b/config/return_complex_argument.cc @@ -11,15 +11,20 @@ //------------------------------------------------------------------------------ #define BLAS_zdotc FORTRAN_NAME( zdotc, ZDOTC ) -// result returned as *hidden argument* -#ifdef __cplusplus -extern "C" +#ifdef ACCELERATE_NEW_LAPACK + #pragma message "include Accelerate.h" + #include +#else + // result returned as *hidden argument* + #ifdef __cplusplus + extern "C" + #endif + void BLAS_zdotc( + std::complex* result, + const blas_int* n, + const std::complex* x, const blas_int* incx, + const std::complex* y, const blas_int* incy ); #endif -void BLAS_zdotc( - std::complex* result, - const blas_int* n, - const std::complex* x, const blas_int* incx, - const std::complex* y, const blas_int* incy ); //------------------------------------------------------------------------------ int main() diff --git a/config/return_float.cc b/config/return_float.cc index 51cf5e35..08686e3f 100644 --- a/config/return_float.cc +++ b/config/return_float.cc @@ -4,19 +4,26 @@ // the terms of the BSD 3-Clause license. See the accompanying LICENSE file. #include +#include #include "config.h" //------------------------------------------------------------------------------ #define BLAS_sdot FORTRAN_NAME( sdot, SDOT ) -// returns *float* -#ifdef __cplusplus -extern "C" +#ifdef BLAS_HAVE_ACCELERATE +//ACCELERATE_NEW_LAPACK + #pragma message "include Accelerate.h" + #include +#else + // returns `float` as usual. + #ifdef __cplusplus + extern "C" + #endif + float BLAS_sdot( const blas_int* n, + const float* x, const blas_int* incx, + const float* y, const blas_int* incy ); #endif -float BLAS_sdot( const blas_int* n, - const float* x, const blas_int* incx, - const float* y, const blas_int* incy ); //------------------------------------------------------------------------------ int main() @@ -30,6 +37,11 @@ int main() i, y[ i ] ); } + auto r = BLAS_sdot( &n, x, &ione, y, &ione ); + if (! std::is_same::value) { + printf( "is_same failed\n" ); + } + float result = BLAS_sdot( &n, x, &ione, y, &ione ); printf( "result = %.1f; should be 35.0\n", result ); diff --git a/config/return_float_f2c.cc b/config/return_float_f2c.cc index 10706677..12cd805f 100644 --- a/config/return_float_f2c.cc +++ b/config/return_float_f2c.cc @@ -10,13 +10,20 @@ //------------------------------------------------------------------------------ #define BLAS_sdot FORTRAN_NAME( sdot, SDOT ) -// returns *double* -#ifdef __cplusplus -extern "C" +#ifdef ACCELERATE_NEW_LAPACK + // New Accelerate API (>= macOS 13.3) does not use f2c convention. + // Since new Accelerate requires using their prototypes in their header, + // it's not possible to test using a custom prototype as below. + #error "Accelerate's new API (>= macOS 13.3) does not use f2c convention." +#else + // returns `double` instead of `float`, per f2c convention. + #ifdef __cplusplus + extern "C" + #endif + double BLAS_sdot( const blas_int* n, + const float* x, const blas_int* incx, + const float* y, const blas_int* incy ); #endif -double BLAS_sdot( const blas_int* n, - const float* x, const blas_int* incx, - const float* y, const blas_int* incy ); //------------------------------------------------------------------------------ int main() diff --git a/configure.py b/configure.py index 00ba0900..48102031 100755 --- a/configure.py +++ b/configure.py @@ -64,6 +64,7 @@ def main(): config.lapack.blas_float_return() config.lapack.blas_complex_return() config.lapack.vendor_version() + config.lapack.lapack_version() # Must test mkl_version before cblas and lapacke, to define HAVE_MKL. try: diff --git a/include/blas/fortran.h b/include/blas/fortran.h index decdcc76..c2b19ee6 100644 --- a/include/blas/fortran.h +++ b/include/blas/fortran.h @@ -14,16 +14,21 @@ #include "blas/config.h" #ifdef ACCELERATE_NEW_LAPACK + // New macOS Accelerate (>= macOS 13.3) requires their prototypes + // with extra mangling, and does not include strlen. + #if defined( BLAS_ILP64 ) && ! defined( ACCELERATE_ILP64 ) + #define ACCELERATE_ILP64 + #endif #include #else // It seems all current Fortran compilers put strlen at end. // Some historical compilers put strlen after the str argument // or make the str argument into a struct. - // New Apple Accelerate (macOS >= 13.3) does not include strlen. #ifndef BLAS_FORTRAN_STRLEN_END #define BLAS_FORTRAN_STRLEN_END #endif + // Set flag to define prototypes below (i.e., not Accelerate's prototypes). #define BLAS_PROTO #endif From 3f8126746f353b6886d947f072144390c92afea9 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Fri, 20 Sep 2024 08:30:29 -0400 Subject: [PATCH 4/7] TMP --- config/blas.cc | 27 ++++++++++++++------------- config/cblas.cc | 1 + config/lapack.py | 15 +++++++++------ config/lapack_potrf.cc | 1 + config/lapack_pstrf.cc | 1 + config/return_complex.cc | 1 + config/return_complex_argument.cc | 1 + config/return_float.cc | 4 ++-- include/blas/config.h | 7 ++++++- include/blas/fortran.h | 14 +++++++++----- 10 files changed, 45 insertions(+), 27 deletions(-) diff --git a/config/blas.cc b/config/blas.cc index e3522a67..db02e28f 100644 --- a/config/blas.cc +++ b/config/blas.cc @@ -12,9 +12,9 @@ #define BLAS_sdot FORTRAN_NAME( sdot, SDOT ) #define BLAS_ddot FORTRAN_NAME( ddot, DDOT ) -#ifdef BLAS_HAVE_ACCELERATE -//ACCELERATE_NEW_LAPACK +#ifdef ACCELERATE_NEW_LAPACK #pragma message "include Accelerate.h" + #include // workaround #include #else // result returned directly @@ -35,17 +35,6 @@ int main() // If blas_int is 64-bit, BLAS can interpret it as 32-bit or 64-bit // to see n = 5 and pass. blas_int n[] = { 5, 5 }, ione = 1; - double x[] = { 1, 2, 3, 4, 5 }; - double y[] = { 5, 4, 3, 2, 1 }; - for (int i = 0; i < n[0]; ++i) { - printf( "x[ %d ] = %.1f; y[ %d ] = %.1f\n", - i, x[ i ], - i, y[ i ] ); - } - - double result = BLAS_ddot( n, x, &ione, y, &ione ); - printf( "result = %.1f; should be 35.0\n", result ); - bool okay = (result == 35); #ifdef ACCELERATE_NEW_LAPACK // To verify this is new Accelerate, check the return type of sdot. @@ -57,6 +46,18 @@ int main() " -> this is old Accelerate" ); #endif + double x[] = { 1, 2, 3, 4, 5 }; + double y[] = { 5, 4, 3, 2, 1 }; + for (int i = 0; i < n[0]; ++i) { + printf( "x[ %d ] = %.1f; y[ %d ] = %.1f\n", + i, x[ i ], + i, y[ i ] ); + } + + double result = BLAS_ddot( n, x, &ione, y, &ione ); + printf( "result = %.1f; should be 35.0\n", result ); + + bool okay = (result == 35); printf( "%s\n", okay ? "ok" : "failed" ); return ! okay; } diff --git a/config/cblas.cc b/config/cblas.cc index 9004d7bb..2460c38c 100644 --- a/config/cblas.cc +++ b/config/cblas.cc @@ -29,6 +29,7 @@ #include #else #pragma message "include Accelerate.h" + #include // workaround #include #endif #else diff --git a/config/lapack.py b/config/lapack.py index bb5c86d4..b805b428 100644 --- a/config/lapack.py +++ b/config/lapack.py @@ -336,11 +336,12 @@ def blas(): #-------------------- Apple Accelerate if (test_all or test_accelerate): + libs = '-framework Accelerate' flags = define('HAVE_ACCELERATE') new_lapack = ' -DACCELERATE_NEW_LAPACK' choices.append( ['macOS Accelerate (new)', - {'LIBS': '-framework Accelerate', + {'LIBS': libs, 'CXXFLAGS': flags + new_lapack }]) # macOS 13.3, g++ 12.2 requires extra flag to parse Apple's headers. @@ -348,17 +349,17 @@ def blas(): extra = ' -flax-vector-conversions' choices.append( ['macOS Accelerate (new, -flax-vector-conversions)', - {'LIBS': '-framework Accelerate ', + {'LIBS': libs + version, 'CXXFLAGS': flags + new_lapack + version + extra }]) choices.append( ['macOS Accelerate (old, pre 13.3)', - {'LIBS': '-framework Accelerate', + {'LIBS': libs, 'CXXFLAGS': flags }]) choices.append( ['macOS Accelerate (old, pre 13.3, -flax-vector-conversions)', - {'LIBS': '-framework Accelerate', + {'LIBS': libs, 'CXXFLAGS': flags + extra}]) # end @@ -411,8 +412,10 @@ def cblas(): ['CBLAS (cblas_ddot) in -lcblas', {'LIBS': '-lcblas'}], ] - LIBS = config.environ['LIBS'] - if ('-framework Accelerate' in LIBS): + CXXFLAGS = config.environ['CXXFLAGS'] + LIBS = config.environ['LIBS'] + if ('-framework Accelerate' in LIBS + and 'ACCELERATE_NEW_LAPACK' not in CXXFLAGS): # macOS puts cblas.h in weird places; add -I for path. # Insert as 2nd choice, so it won't be used if 1st choice above works. # On macOS 13, cblas.h seems to be in the compiler's default search diff --git a/config/lapack_potrf.cc b/config/lapack_potrf.cc index 38beaedc..5732bde5 100644 --- a/config/lapack_potrf.cc +++ b/config/lapack_potrf.cc @@ -19,6 +19,7 @@ #ifdef ACCELERATE_NEW_LAPACK #pragma message "include Accelerate.h" + #include // workaround #include #else #ifdef __cplusplus diff --git a/config/lapack_pstrf.cc b/config/lapack_pstrf.cc index 70b176b6..a760b744 100644 --- a/config/lapack_pstrf.cc +++ b/config/lapack_pstrf.cc @@ -19,6 +19,7 @@ #ifdef ACCELERATE_NEW_LAPACK #pragma message "include Accelerate.h" + #include // workaround #include #else #ifdef __cplusplus diff --git a/config/return_complex.cc b/config/return_complex.cc index e8696db4..d4a6ee40 100644 --- a/config/return_complex.cc +++ b/config/return_complex.cc @@ -16,6 +16,7 @@ #ifdef ACCELERATE_NEW_LAPACK #pragma message "include Accelerate.h" + #include // workaround #include #else // result returned directly diff --git a/config/return_complex_argument.cc b/config/return_complex_argument.cc index b00eb161..f6c871bb 100644 --- a/config/return_complex_argument.cc +++ b/config/return_complex_argument.cc @@ -13,6 +13,7 @@ #ifdef ACCELERATE_NEW_LAPACK #pragma message "include Accelerate.h" + #include // workaround #include #else // result returned as *hidden argument* diff --git a/config/return_float.cc b/config/return_float.cc index 08686e3f..94d4ee57 100644 --- a/config/return_float.cc +++ b/config/return_float.cc @@ -11,9 +11,9 @@ //------------------------------------------------------------------------------ #define BLAS_sdot FORTRAN_NAME( sdot, SDOT ) -#ifdef BLAS_HAVE_ACCELERATE -//ACCELERATE_NEW_LAPACK +#ifdef ACCELERATE_NEW_LAPACK #pragma message "include Accelerate.h" + #include // workaround #include #else // returns `float` as usual. diff --git a/include/blas/config.h b/include/blas/config.h index bbae1a2b..d419a5c1 100644 --- a/include/blas/config.h +++ b/include/blas/config.h @@ -11,7 +11,12 @@ #include "blas/defines.h" #ifndef blas_int - #if defined(BLAS_ILP64) + #if defined( BLAS_ILP64 ) && defined( ACCELERATE_NEW_LAPACK ) + #ifndef ACCELERATE_LAPACK_ILP64 + #define ACCELERATE_LAPACK_ILP64 + #endif + typedef long blas_int; + #elif defined( BLAS_ILP64 ) typedef int64_t blas_int; #else typedef int blas_int; diff --git a/include/blas/fortran.h b/include/blas/fortran.h index c2b19ee6..b7334dee 100644 --- a/include/blas/fortran.h +++ b/include/blas/fortran.h @@ -9,16 +9,20 @@ #include "blas/defines.h" #include "blas/mangling.h" -// Accelerate uses std::complex -#define BLAS_COMPLEX_CPP +#ifdef ACCELERATE_NEW_LAPACK + // Accelerate uses std::complex; see config.h + #define BLAS_COMPLEX_CPP +#endif + #include "blas/config.h" #ifdef ACCELERATE_NEW_LAPACK // New macOS Accelerate (>= macOS 13.3) requires their prototypes // with extra mangling, and does not include strlen. - #if defined( BLAS_ILP64 ) && ! defined( ACCELERATE_ILP64 ) - #define ACCELERATE_ILP64 - #endif + // macOS 13.3, GNU g++ 12.2 has bug including Accelerate.h: + // stdlib.h:142:44: error: expected ')' before '__compar' + // Including stdlib.h first appeases the compiler. + #include // workaround #include #else // It seems all current Fortran compilers put strlen at end. From 5108f75cae6ee0446f5a2e8561e9320e8ca066b3 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Sun, 22 Sep 2024 00:54:24 -0400 Subject: [PATCH 5/7] test: check blas_int; include in run_tests --- test/get_type_name.hh | 38 ++++++++++++++++++++++++++++++++++++++ test/run_tests.py | 2 ++ test/test_util.cc | 11 +++++++++++ 3 files changed, 51 insertions(+) create mode 100644 test/get_type_name.hh diff --git a/test/get_type_name.hh b/test/get_type_name.hh new file mode 100644 index 00000000..01b5049f --- /dev/null +++ b/test/get_type_name.hh @@ -0,0 +1,38 @@ +// ----------------------------------------------------------------------------- +// see https://stackoverflow.com/questions/81870/is-it-possible-to-print-a-variables-type-in-standard-c +#include +#include +#include +#include +#include + +// for demangling on non-Microsoft platforms +#ifndef _MSC_VER + #include +#endif + +template +std::string get_type_name() +{ + using T_noref = typename std::remove_reference::type; + + std::unique_ptr< char, void(*)(void*) > own( + #ifndef _MSC_VER + abi::__cxa_demangle( typeid( T_noref ).name(), nullptr, nullptr, nullptr ), + #else + nullptr, + #endif + std::free + ); + + std::string r = own != nullptr ? own.get() : typeid( T_noref ).name(); + if (std::is_const::value) + r += " const"; + if (std::is_volatile::value) + r += " volatile"; + if (std::is_lvalue_reference::value) + r += "&"; + else if (std::is_rvalue_reference::value) + r += "&&"; + return r; +} diff --git a/test/run_tests.py b/test/run_tests.py index 6aa97d37..22b11278 100755 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -383,6 +383,8 @@ def filter_csv( values, csv ): if (opts.aux): cmds += [ + [ 'util', '' ], + [ 'memcpy', dtype + n ], [ 'copy_vector', dtype + n + incx_pos + incy_pos ], [ 'set_vector', dtype + n + incx_pos + incy_pos ], diff --git a/test/test_util.cc b/test/test_util.cc index 2a765bb4..8664bb23 100644 --- a/test/test_util.cc +++ b/test/test_util.cc @@ -5,11 +5,21 @@ #include "test.hh" #include "../src/device_internal.hh" +#include "get_type_name.hh" +#include "blas/config.h" #include using testsweeper::get_wtime; +// ----------------------------------------------------------------------------- +void test_types() +{ + printf( "%s\n", __func__ ); + printf( "\tblas_int is %s\n", get_type_name().c_str() ); + printf( "\tsizeof( blas_int ) = %lld\n", llong( sizeof( blas_int ) ) ); +} + // ----------------------------------------------------------------------------- void test_enums() { @@ -884,6 +894,7 @@ void test_util( Params& params, bool run ) if (first) { first = false; + test_types(); test_enums(); test_exceptions(); test_abs1(); From d9b770b7e9e694a5fcd4bb9a1023e130fe83f973 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Sun, 22 Sep 2024 00:56:42 -0400 Subject: [PATCH 6/7] TMP config --- config/config.py | 1 + config/lapack.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/config/config.py b/config/config.py index 70a21683..60ed3bf7 100644 --- a/config/config.py +++ b/config/config.py @@ -191,6 +191,7 @@ def print_test( label ): ''' if (label): print( '-'*20 + '\n' + label, file=log ) + # For long lines, print result on next line. if (len( label ) > 72): print( label ) label = '' diff --git a/config/lapack.py b/config/lapack.py index b805b428..17e667e8 100644 --- a/config/lapack.py +++ b/config/lapack.py @@ -339,18 +339,21 @@ def blas(): libs = '-framework Accelerate' flags = define('HAVE_ACCELERATE') new_lapack = ' -DACCELERATE_NEW_LAPACK' + # macOS 13.3, g++ 12.2 requires extra flag to parse Apple's headers. + extra = ' -flax-vector-conversions' + # todo: -mmacos-version-min starting with Xcode 16 + # todo: check `${CXX} --help -v`, as CMake does? + macos = ' -mmacosx-version-min=13.3' + choices.append( ['macOS Accelerate (new)', {'LIBS': libs, 'CXXFLAGS': flags + new_lapack }]) - # macOS 13.3, g++ 12.2 requires extra flag to parse Apple's headers. - version = ' -mmacosx-version-min=13.3' - extra = ' -flax-vector-conversions' choices.append( ['macOS Accelerate (new, -flax-vector-conversions)', - {'LIBS': libs + version, - 'CXXFLAGS': flags + new_lapack + version + extra }]) + {'LIBS': libs + macos, + 'CXXFLAGS': flags + new_lapack + macos + extra }]) choices.append( ['macOS Accelerate (old, pre 13.3)', @@ -360,7 +363,7 @@ def blas(): choices.append( ['macOS Accelerate (old, pre 13.3, -flax-vector-conversions)', {'LIBS': libs, - 'CXXFLAGS': flags + extra}]) + 'CXXFLAGS': flags + extra }]) # end #-------------------- generic -lblas @@ -598,13 +601,13 @@ def lapack_version(): ''' config.print_test( 'LAPACK version' ) (rc, out, err) = config.compile_run( 'config/lapack_version.cc' ) - s = re.search( r'^LAPACK_VERSION=((\d+)\.(\d+)\.(\d+))', out ) + s = re.search( r'^LAPACK_VERSION=((-?\d+)\.(-?\d+)\.(-?\d+))', out ) if (rc == 0 and s): major = int( s.group( 2 ) ) minor = int( s.group( 3 ) ) patch = int( s.group( 4 ) ) # Sanity checks may catch ilp64 error. - assert 3 <= major <= 4, "Expected LAPACK version 3 (current) or 4 (future), got version " + str( major ) + assert 3 <= major <= 4, "Expected LAPACK version 3 (current) or 4 (future), got version " + str( major ) + "; possibly 32/64-bit mismatch" assert 0 <= minor <= 100, "Expected LAPACK minor in 0-100, got " + str( minor ) assert 0 <= minor <= 100, "Expected LAPACK patch in 0-100, got " + str( patch ) v = '%d%02d%02d' % (major, minor, patch) From 61259aad8e2166e94bfb50617893105484a1794a Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Mon, 23 Sep 2024 16:11:20 -0400 Subject: [PATCH 7/7] cmake: support new Accelerate --- cmake/BLASConfig.cmake | 5 +-- cmake/BLASFinder.cmake | 70 +++++++++++++++++++++++++++++++++++------- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/cmake/BLASConfig.cmake b/cmake/BLASConfig.cmake index 41be1eed..fc2c2897 100644 --- a/cmake/BLASConfig.cmake +++ b/cmake/BLASConfig.cmake @@ -31,9 +31,10 @@ set( found false ) if (NOT found) if ("${BLAS_LIBRARIES}" MATCHES "-framework Accelerate|Accelerate.framework") message( "${blue} Accelerate framework${plain}" ) - list( APPEND blaspp_defs_ "-DBLAS_HAVE_ACCELERATE" ) + #list( APPEND blaspp_defs_ "-DBLAS_HAVE_ACCELERATE" ) set( found true ) - if (NOT DEFINED blas_return_float_f2c) + if (NOT DEFINED blas_return_float_f2c + AND NOT blaspp_defs_ MATCHES "ACCELERATE_NEW_LAPACK") set( blas_return_float_f2c true ) endif() endif() diff --git a/cmake/BLASFinder.cmake b/cmake/BLASFinder.cmake index 29871c73..2cc88a6b 100644 --- a/cmake/BLASFinder.cmake +++ b/cmake/BLASFinder.cmake @@ -68,12 +68,14 @@ function( debug_print_list msg ) message( "---------- lists: ${msg}" ) message( "blas_name_list = ${blas_name_list}" ) message( "blas_libs_list = ${blas_libs_list}" ) + message( "blas_defs_list = ${blas_defs_list}" ) message( "\nrow; ${red}blas_name;${plain} blas_libs" ) set( i 0 ) foreach (name IN LISTS blas_name_list) list( GET blas_libs_list ${i} libs ) - message( "${i}; ${red}${name};${plain} ${libs}" ) + list( GET blas_defs_list ${i} defs ) + message( "${i}; ${red}${name};${plain} libs = ${libs} defs = ${defs}" ) math( EXPR i "${i} + 1" ) endforeach() message( "" ) @@ -119,12 +121,6 @@ else() ) endif() -#---------------------------------------- integer sizes to test -set( int_size_list - " " # int (LP64) - "-DBLAS_ILP64" # int64_t (ILP64) -) - #------------------------------------------------------------------------------- # Parse options: BLAS_LIBRARIES, blas, blas_int, blas_threaded, blas_fortran. @@ -223,11 +219,20 @@ if (CMAKE_CROSSCOMPILING AND test_int AND test_int64) " `blas_int=int64` (ilp64 convention).${plain}" ) endif() +set( int_size_list "" ) +if (test_int) + list( APPEND int_size_list " " ) # int (lp64) +endif() +if (test_int64) + list( APPEND int_size_list "-DBLAS_ILP64" ) # int64_t (ILP64) +endif() + message( DEBUG " blas_int = '${blas_int}' blas_int_ = '${blas_int_}' test_int = '${test_int}' -test_int64 = '${test_int64}'") +test_int64 = '${test_int64}' +int_size_list = '${int_size_list}'") #---------------------------------------- blas_threaded string( TOLOWER "${blas_threaded}" blas_threaded_ ) @@ -256,6 +261,7 @@ test_sequential = '${test_sequential}'") set( blas_name_list "" ) set( blas_libs_list "" ) +set( blas_defs_list "" ) #---------------------------------------- BLAS_LIBRARIES if (test_blas_libraries) @@ -266,6 +272,7 @@ if (test_blas_libraries) list( APPEND blas_name_list "\$BLAS_LIBRARIES" ) list( APPEND blas_libs_list "${BLAS_LIBRARIES_ESC}" ) + list( APPEND blas_defs_list " " ) debug_print_list( "BLAS_LIBRARIES" ) endif() @@ -273,6 +280,7 @@ endif() if (test_all OR test_default) list( APPEND blas_name_list "default (no library)" ) list( APPEND blas_libs_list " " ) # Use space so APPEND works later. + list( APPEND blas_defs_list " " ) debug_print_list( "default" ) endif() @@ -285,11 +293,13 @@ if (test_all OR test_mkl) if (test_int) list( APPEND blas_name_list "Intel MKL lp64, GNU threads (gomp), gfortran") list( APPEND blas_libs_list "-lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() if (test_int64) list( APPEND blas_name_list "Intel MKL ilp64, GNU threads (gomp), gfortran") list( APPEND blas_libs_list "-lmkl_gf_ilp64 -lmkl_gnu_thread -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() elseif (test_ifort AND intelllvm_compiler) @@ -297,9 +307,11 @@ if (test_all OR test_mkl) if (test_int) list( APPEND blas_name_list "Intel MKL lp64, Intel threads (iomp5), ifort") list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core" ) + list( APPEND blas_defs_list " " ) elseif (test_int64) list( APPEND blas_name_list "Intel MKL ilp64, Intel threads (iomp5), ifort") list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() elseif (test_ifort AND intel_compiler) @@ -307,11 +319,13 @@ if (test_all OR test_mkl) if (test_int) list( APPEND blas_name_list "Intel MKL lp64, Intel threads (iomp5), ifort") list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() if (test_int64) list( APPEND blas_name_list "Intel MKL ilp64, Intel threads (iomp5), ifort") list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() else() @@ -327,11 +341,13 @@ if (test_all OR test_mkl) if (test_int) list( APPEND blas_name_list "Intel MKL lp64, sequential, ifort" ) list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_sequential -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() if (test_int64) list( APPEND blas_name_list "Intel MKL ilp64, sequential, ifort" ) list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_sequential -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() endif() # ifort @@ -340,11 +356,13 @@ if (test_all OR test_mkl) if (test_int) list( APPEND blas_name_list "Intel MKL lp64, sequential, gfortran" ) list( APPEND blas_libs_list "-lmkl_gf_lp64 -lmkl_sequential -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() if (test_int64) list( APPEND blas_name_list "Intel MKL ilp64, sequential, gfortran" ) list( APPEND blas_libs_list "-lmkl_gf_ilp64 -lmkl_sequential -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() endif() # gfortran @@ -354,11 +372,13 @@ if (test_all OR test_mkl) if (test_int) list( APPEND blas_name_list "Intel MKL lp64, sequential, ifort" ) list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_sequential -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() if (test_int64) list( APPEND blas_name_list "Intel MKL ilp64, sequential, ifort" ) list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_sequential -lmkl_core" ) + list( APPEND blas_defs_list " " ) endif() endif() # ifort && not intel endif() # sequential @@ -387,11 +407,13 @@ if (test_all OR test_essl) if (test_int) list( APPEND blas_name_list "IBM ESSL int (lp64), multi-threaded, with OpenMP" ) list( APPEND blas_libs_list "-lesslsmp" ) + list( APPEND blas_defs_list " " ) endif() if (test_int64) list( APPEND blas_name_list "IBM ESSL int64 (ilp64), multi-threaded, with OpenMP" ) list( APPEND blas_libs_list "-lesslsmp6464" ) + list( APPEND blas_defs_list " " ) endif() endif() endif() # threaded @@ -400,11 +422,13 @@ if (test_all OR test_essl) if (test_int) list( APPEND blas_name_list "IBM ESSL int (lp64), sequential" ) list( APPEND blas_libs_list "-lessl" ) + list( APPEND blas_defs_list " " ) endif() if (test_int64) list( APPEND blas_name_list "IBM ESSL int64 (ilp64), sequential" ) list( APPEND blas_libs_list "-lessl6464" ) + list( APPEND blas_defs_list " " ) endif() endif() # sequential debug_print_list( "essl" ) @@ -415,13 +439,20 @@ if (test_all OR test_openblas) # todo: OPENBLAS_?(ROOT|DIR) list( APPEND blas_name_list "OpenBLAS" ) list( APPEND blas_libs_list "-lopenblas" ) + list( APPEND blas_defs_list " " ) debug_print_list( "openblas" ) endif() #---------------------------------------- Apple Accelerate if (test_all OR test_accelerate) - list( APPEND blas_name_list "Apple Accelerate" ) + list( APPEND blas_name_list "Apple Accelerate (new)" ) + list( APPEND blas_libs_list "-framework Accelerate" ) + list( APPEND blas_defs_list "-DBLAS_HAVE_ACCELERATE -DACCELERATE_NEW_LAPACK" ) + + list( APPEND blas_name_list "Apple Accelerate (old, pre 13.3)" ) list( APPEND blas_libs_list "-framework Accelerate" ) + list( APPEND blas_defs_list "-DBLAS_HAVE_ACCELERATE" ) + debug_print_list( "accelerate" ) endif() @@ -429,6 +460,7 @@ endif() if (test_all OR test_generic) list( APPEND blas_name_list "generic" ) list( APPEND blas_libs_list "-lblas" ) + list( APPEND blas_defs_list " " ) debug_print_list( "generic" ) endif() @@ -439,11 +471,13 @@ if (test_all OR test_acml) if (test_threaded) list( APPEND blas_name_list "AMD ACML threaded" ) list( APPEND blas_libs_list "-lacml_mp" ) + list( APPEND blas_defs_list " " ) endif() if (test_sequential) list( APPEND blas_name_list "AMD ACML sequential" ) list( APPEND blas_libs_list "-lacml" ) + list( APPEND blas_defs_list " " ) endif() debug_print_list( "acml" ) endif() @@ -458,6 +492,7 @@ set( i 0 ) foreach (blas_name IN LISTS blas_name_list) message( TRACE "i: ${i}" ) list( GET blas_libs_list ${i} blas_libs ) + list( GET blas_defs_list ${i} blas_defs ) math( EXPR i "${i}+1" ) if (i GREATER 1) @@ -465,7 +500,11 @@ foreach (blas_name IN LISTS blas_name_list) endif() message( "${blas_name}" ) message( " libs: ${blas_libs}" ) + if (defs MATCHES "[^ ]") # non-empty + message( " defs: ${blas_defs}" ) + endif() + # Split space-separated libs into CMake list. # Strip to deal with default lib being space, " ". # Undo escaping \; semi-colons and split on spaces to make list. # But keep '-framework Accelerate' together as one item. @@ -488,7 +527,7 @@ foreach (blas_name IN LISTS blas_name_list) LINK_LIBRARIES ${blas_libs} ${openmp_lib} # not "..." quoted; screws up OpenMP COMPILE_DEFINITIONS - "${mangling} ${int_size}" + "${mangling} ${int_size} ${blas_defs}" OUTPUT_VARIABLE link_output ) @@ -509,7 +548,7 @@ foreach (blas_name IN LISTS blas_name_list) LINK_LIBRARIES ${blas_libs} ${openmp_lib} # not "..." quoted; screws up OpenMP COMPILE_DEFINITIONS - "${mangling} ${int_size}" + "${mangling} ${int_size} ${blas_defs}" COMPILE_OUTPUT_VARIABLE compile_output RUN_OUTPUT_VARIABLE @@ -533,6 +572,12 @@ foreach (blas_name IN LISTS blas_name_list) # If it runs and prints ok, we're done, so break all 3 loops. message( "${label} ${blue} yes${plain}" ) + # Split space-separated defs into CMake list. + message( DEBUG " blas_defs: '${blas_defs}'" ) + string( STRIP "${blas_defs}" blas_defs ) + string( REGEX REPLACE "([^ ])( +|\\\;)" "\\1;" blas_defs "${blas_defs}" ) + message( DEBUG " blas_defs: '${blas_defs}' (split)" ) + set( BLAS_FOUND true CACHE INTERNAL "" ) set( BLAS_LIBRARIES "${blas_libs}" CACHE STRING "" FORCE ) if (mangling MATCHES "[^ ]") # non-empty @@ -541,6 +586,9 @@ foreach (blas_name IN LISTS blas_name_list) if (int_size MATCHES "[^ ]") # non-empty list( APPEND blaspp_defs_ "${int_size}" ) endif() + if (blas_defs MATCHES "[^ ]") # non-empty + list( APPEND blaspp_defs_ "${blas_defs}" ) + endif() break() else() message( "${label} ${red} no (didn't run: int mismatch, etc.)${plain}" )