Spaces:
Build error
Build error
| // ================================================================================================= | |
| // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This | |
| // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- | |
| // width of 100 characters per line. | |
| // | |
| // Author(s): | |
| // Cedric Nugteren <www.cedricnugteren.nl> | |
| // | |
| // This file contains the Netlib CBLAS interface to the CLBlast BLAS routines, performing all buffer | |
| // copies automatically and running on the default OpenCL platform and device. For full control over | |
| // performance, it is advised to use the regular clblast.h or clblast_c.h headers instead. | |
| // | |
| // ================================================================================================= | |
| // Exports library functions under Windows when building a DLL. See also: | |
| // https://msdn.microsoft.com/en-us/library/a90k134d.aspx | |
| // The C interface | |
| extern "C" { | |
| // ================================================================================================= | |
| // Matrix layout and transpose types | |
| typedef enum CLBlastLayout_ { CLBlastLayoutRowMajor = 101, | |
| CLBlastLayoutColMajor = 102 } CLBlastLayout; | |
| typedef enum CLBlastTranspose_ { CLBlastTransposeNo = 111, CLBlastTransposeYes = 112, | |
| CLBlastTransposeConjugate = 113 } CLBlastTranspose; | |
| typedef enum CLBlastTriangle_ { CLBlastTriangleUpper = 121, | |
| CLBlastTriangleLower = 122 } CLBlastTriangle; | |
| typedef enum CLBlastDiagonal_ { CLBlastDiagonalNonUnit = 131, | |
| CLBlastDiagonalUnit = 132 } CLBlastDiagonal; | |
| typedef enum CLBlastSide_ { CLBlastSideLeft = 141, CLBlastSideRight = 142 } CLBlastSide; | |
| typedef enum CLBlastKernelMode_ { CLBlastKernelModeCrossCorrelation = 141, CLBlastKernelModeConvolution = 152 } CLBlastKernelMode; | |
| // For full compatibility with CBLAS | |
| typedef CLBlastLayout CBLAS_ORDER; | |
| typedef CLBlastTranspose CBLAS_TRANSPOSE; | |
| typedef CLBlastTriangle CBLAS_UPLO; | |
| typedef CLBlastDiagonal CBLAS_DIAG; | |
| typedef CLBlastSide CBLAS_SIDE; | |
| // ================================================================================================= | |
| // BLAS level-1 (vector-vector) routines | |
| // ================================================================================================= | |
| // Generate givens plane rotation: SROTG/DROTG | |
| void PUBLIC_API cblas_srotg(float* sa, | |
| float* sb, | |
| float* sc, | |
| float* ss); | |
| void PUBLIC_API cblas_drotg(double* sa, | |
| double* sb, | |
| double* sc, | |
| double* ss); | |
| // Generate modified givens plane rotation: SROTMG/DROTMG | |
| void PUBLIC_API cblas_srotmg(float* sd1, | |
| float* sd2, | |
| float* sx1, | |
| const float sy1, | |
| float* sparam); | |
| void PUBLIC_API cblas_drotmg(double* sd1, | |
| double* sd2, | |
| double* sx1, | |
| const double sy1, | |
| double* sparam); | |
| // Apply givens plane rotation: SROT/DROT | |
| void PUBLIC_API cblas_srot(const int n, | |
| float* x, const int x_inc, | |
| float* y, const int y_inc, | |
| const float cos, | |
| const float sin); | |
| void PUBLIC_API cblas_drot(const int n, | |
| double* x, const int x_inc, | |
| double* y, const int y_inc, | |
| const double cos, | |
| const double sin); | |
| // Apply modified givens plane rotation: SROTM/DROTM | |
| void PUBLIC_API cblas_srotm(const int n, | |
| float* x, const int x_inc, | |
| float* y, const int y_inc, | |
| float* sparam); | |
| void PUBLIC_API cblas_drotm(const int n, | |
| double* x, const int x_inc, | |
| double* y, const int y_inc, | |
| double* sparam); | |
| // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP | |
| void PUBLIC_API cblas_sswap(const int n, | |
| float* x, const int x_inc, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_dswap(const int n, | |
| double* x, const int x_inc, | |
| double* y, const int y_inc); | |
| void PUBLIC_API cblas_cswap(const int n, | |
| void* x, const int x_inc, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zswap(const int n, | |
| void* x, const int x_inc, | |
| void* y, const int y_inc); | |
| // Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL | |
| void PUBLIC_API cblas_sscal(const int n, | |
| const float alpha, | |
| float* x, const int x_inc); | |
| void PUBLIC_API cblas_dscal(const int n, | |
| const double alpha, | |
| double* x, const int x_inc); | |
| void PUBLIC_API cblas_cscal(const int n, | |
| const void* alpha, | |
| void* x, const int x_inc); | |
| void PUBLIC_API cblas_zscal(const int n, | |
| const void* alpha, | |
| void* x, const int x_inc); | |
| // Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY | |
| void PUBLIC_API cblas_scopy(const int n, | |
| const float* x, const int x_inc, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_dcopy(const int n, | |
| const double* x, const int x_inc, | |
| double* y, const int y_inc); | |
| void PUBLIC_API cblas_ccopy(const int n, | |
| const void* x, const int x_inc, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zcopy(const int n, | |
| const void* x, const int x_inc, | |
| void* y, const int y_inc); | |
| // Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY | |
| void PUBLIC_API cblas_saxpy(const int n, | |
| const float alpha, | |
| const float* x, const int x_inc, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_daxpy(const int n, | |
| const double alpha, | |
| const double* x, const int x_inc, | |
| double* y, const int y_inc); | |
| void PUBLIC_API cblas_caxpy(const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zaxpy(const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| void* y, const int y_inc); | |
| // Dot product of two vectors: SDOT/DDOT/HDOT | |
| float PUBLIC_API cblas_sdot(const int n, | |
| const float* x, const int x_inc, | |
| const float* y, const int y_inc); | |
| double PUBLIC_API cblas_ddot(const int n, | |
| const double* x, const int x_inc, | |
| const double* y, const int y_inc); | |
| // Dot product of two complex vectors: CDOTU/ZDOTU | |
| void PUBLIC_API cblas_cdotu_sub(const int n, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* dot); | |
| void PUBLIC_API cblas_zdotu_sub(const int n, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* dot); | |
| // Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC | |
| void PUBLIC_API cblas_cdotc_sub(const int n, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* dot); | |
| void PUBLIC_API cblas_zdotc_sub(const int n, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* dot); | |
| // Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2 | |
| float PUBLIC_API cblas_snrm2(const int n, | |
| const float* x, const int x_inc); | |
| double PUBLIC_API cblas_dnrm2(const int n, | |
| const double* x, const int x_inc); | |
| float PUBLIC_API cblas_scnrm2(const int n, | |
| const void* x, const int x_inc); | |
| double PUBLIC_API cblas_dznrm2(const int n, | |
| const void* x, const int x_inc); | |
| // Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM | |
| float PUBLIC_API cblas_sasum(const int n, | |
| const float* x, const int x_inc); | |
| double PUBLIC_API cblas_dasum(const int n, | |
| const double* x, const int x_inc); | |
| float PUBLIC_API cblas_scasum(const int n, | |
| const void* x, const int x_inc); | |
| double PUBLIC_API cblas_dzasum(const int n, | |
| const void* x, const int x_inc); | |
| // Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM | |
| float PUBLIC_API cblas_ssum(const int n, | |
| const float* x, const int x_inc); | |
| double PUBLIC_API cblas_dsum(const int n, | |
| const double* x, const int x_inc); | |
| float PUBLIC_API cblas_scsum(const int n, | |
| const void* x, const int x_inc); | |
| double PUBLIC_API cblas_dzsum(const int n, | |
| const void* x, const int x_inc); | |
| // Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX | |
| int PUBLIC_API cblas_isamax(const int n, | |
| const float* x, const int x_inc); | |
| int PUBLIC_API cblas_idamax(const int n, | |
| const double* x, const int x_inc); | |
| int PUBLIC_API cblas_icamax(const int n, | |
| const void* x, const int x_inc); | |
| int PUBLIC_API cblas_izamax(const int n, | |
| const void* x, const int x_inc); | |
| // Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN | |
| int PUBLIC_API cblas_isamin(const int n, | |
| const float* x, const int x_inc); | |
| int PUBLIC_API cblas_idamin(const int n, | |
| const double* x, const int x_inc); | |
| int PUBLIC_API cblas_icamin(const int n, | |
| const void* x, const int x_inc); | |
| int PUBLIC_API cblas_izamin(const int n, | |
| const void* x, const int x_inc); | |
| // Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX | |
| int PUBLIC_API cblas_ismax(const int n, | |
| const float* x, const int x_inc); | |
| int PUBLIC_API cblas_idmax(const int n, | |
| const double* x, const int x_inc); | |
| int PUBLIC_API cblas_icmax(const int n, | |
| const void* x, const int x_inc); | |
| int PUBLIC_API cblas_izmax(const int n, | |
| const void* x, const int x_inc); | |
| // Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN | |
| int PUBLIC_API cblas_ismin(const int n, | |
| const float* x, const int x_inc); | |
| int PUBLIC_API cblas_idmin(const int n, | |
| const double* x, const int x_inc); | |
| int PUBLIC_API cblas_icmin(const int n, | |
| const void* x, const int x_inc); | |
| int PUBLIC_API cblas_izmin(const int n, | |
| const void* x, const int x_inc); | |
| // ================================================================================================= | |
| // BLAS level-2 (matrix-vector) routines | |
| // ================================================================================================= | |
| // General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV | |
| void PUBLIC_API cblas_sgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float* x, const int x_inc, | |
| const float beta, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_dgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double* x, const int x_inc, | |
| const double beta, | |
| double* y, const int y_inc); | |
| void PUBLIC_API cblas_cgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| // General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV | |
| void PUBLIC_API cblas_sgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, const int kl, const int ku, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float* x, const int x_inc, | |
| const float beta, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_dgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, const int kl, const int ku, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double* x, const int x_inc, | |
| const double beta, | |
| double* y, const int y_inc); | |
| void PUBLIC_API cblas_cgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, const int kl, const int ku, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, const int kl, const int ku, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| // Hermitian matrix-vector multiplication: CHEMV/ZHEMV | |
| void PUBLIC_API cblas_chemv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| // Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV | |
| void PUBLIC_API cblas_chbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV | |
| void PUBLIC_API cblas_chpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* ap, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| void PUBLIC_API cblas_zhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* ap, | |
| const void* x, const int x_inc, | |
| const void* beta, | |
| void* y, const int y_inc); | |
| // Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV | |
| void PUBLIC_API cblas_ssymv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float* x, const int x_inc, | |
| const float beta, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_dsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double* x, const int x_inc, | |
| const double beta, | |
| double* y, const int y_inc); | |
| // Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV | |
| void PUBLIC_API cblas_ssbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, const int k, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float* x, const int x_inc, | |
| const float beta, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_dsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, const int k, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double* x, const int x_inc, | |
| const double beta, | |
| double* y, const int y_inc); | |
| // Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV | |
| void PUBLIC_API cblas_sspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const float* ap, | |
| const float* x, const int x_inc, | |
| const float beta, | |
| float* y, const int y_inc); | |
| void PUBLIC_API cblas_dspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const double* ap, | |
| const double* x, const int x_inc, | |
| const double beta, | |
| double* y, const int y_inc); | |
| // Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV | |
| void PUBLIC_API cblas_strmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const float* a, const int a_ld, | |
| float* x, const int x_inc); | |
| void PUBLIC_API cblas_dtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const double* a, const int a_ld, | |
| double* x, const int x_inc); | |
| void PUBLIC_API cblas_ctrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| void PUBLIC_API cblas_ztrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| // Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV | |
| void PUBLIC_API cblas_stbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const float* a, const int a_ld, | |
| float* x, const int x_inc); | |
| void PUBLIC_API cblas_dtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const double* a, const int a_ld, | |
| double* x, const int x_inc); | |
| void PUBLIC_API cblas_ctbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| void PUBLIC_API cblas_ztbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| // Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV | |
| void PUBLIC_API cblas_stpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const float* ap, | |
| float* x, const int x_inc); | |
| void PUBLIC_API cblas_dtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const double* ap, | |
| double* x, const int x_inc); | |
| void PUBLIC_API cblas_ctpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* ap, | |
| void* x, const int x_inc); | |
| void PUBLIC_API cblas_ztpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* ap, | |
| void* x, const int x_inc); | |
| // Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV | |
| void PUBLIC_API cblas_strsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const float* a, const int a_ld, | |
| float* x, const int x_inc); | |
| void PUBLIC_API cblas_dtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const double* a, const int a_ld, | |
| double* x, const int x_inc); | |
| void PUBLIC_API cblas_ctrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| void PUBLIC_API cblas_ztrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| // Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV | |
| void PUBLIC_API cblas_stbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const float* a, const int a_ld, | |
| float* x, const int x_inc); | |
| void PUBLIC_API cblas_dtbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const double* a, const int a_ld, | |
| double* x, const int x_inc); | |
| void PUBLIC_API cblas_ctbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| void PUBLIC_API cblas_ztbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, const int k, | |
| const void* a, const int a_ld, | |
| void* x, const int x_inc); | |
| // Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV | |
| void PUBLIC_API cblas_stpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const float* ap, | |
| float* x, const int x_inc); | |
| void PUBLIC_API cblas_dtpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const double* ap, | |
| double* x, const int x_inc); | |
| void PUBLIC_API cblas_ctpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* ap, | |
| void* x, const int x_inc); | |
| void PUBLIC_API cblas_ztpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int n, | |
| const void* ap, | |
| void* x, const int x_inc); | |
| // General rank-1 matrix update: SGER/DGER/HGER | |
| void PUBLIC_API cblas_sger(const CLBlastLayout layout, | |
| const int m, const int n, | |
| const float alpha, | |
| const float* x, const int x_inc, | |
| const float* y, const int y_inc, | |
| float* a, const int a_ld); | |
| void PUBLIC_API cblas_dger(const CLBlastLayout layout, | |
| const int m, const int n, | |
| const double alpha, | |
| const double* x, const int x_inc, | |
| const double* y, const int y_inc, | |
| double* a, const int a_ld); | |
| // General rank-1 complex matrix update: CGERU/ZGERU | |
| void PUBLIC_API cblas_cgeru(const CLBlastLayout layout, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* a, const int a_ld); | |
| void PUBLIC_API cblas_zgeru(const CLBlastLayout layout, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* a, const int a_ld); | |
| // General rank-1 complex conjugated matrix update: CGERC/ZGERC | |
| void PUBLIC_API cblas_cgerc(const CLBlastLayout layout, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* a, const int a_ld); | |
| void PUBLIC_API cblas_zgerc(const CLBlastLayout layout, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* a, const int a_ld); | |
| // Hermitian rank-1 matrix update: CHER/ZHER | |
| void PUBLIC_API cblas_cher(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const void* x, const int x_inc, | |
| void* a, const int a_ld); | |
| void PUBLIC_API cblas_zher(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const void* x, const int x_inc, | |
| void* a, const int a_ld); | |
| // Hermitian packed rank-1 matrix update: CHPR/ZHPR | |
| void PUBLIC_API cblas_chpr(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const void* x, const int x_inc, | |
| void* ap); | |
| void PUBLIC_API cblas_zhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const void* x, const int x_inc, | |
| void* ap); | |
| // Hermitian rank-2 matrix update: CHER2/ZHER2 | |
| void PUBLIC_API cblas_cher2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* a, const int a_ld); | |
| void PUBLIC_API cblas_zher2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* a, const int a_ld); | |
| // Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 | |
| void PUBLIC_API cblas_chpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* ap); | |
| void PUBLIC_API cblas_zhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| void* ap); | |
| // Symmetric rank-1 matrix update: SSYR/DSYR/HSYR | |
| void PUBLIC_API cblas_ssyr(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const float* x, const int x_inc, | |
| float* a, const int a_ld); | |
| void PUBLIC_API cblas_dsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const double* x, const int x_inc, | |
| double* a, const int a_ld); | |
| // Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR | |
| void PUBLIC_API cblas_sspr(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const float* x, const int x_inc, | |
| float* ap); | |
| void PUBLIC_API cblas_dspr(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const double* x, const int x_inc, | |
| double* ap); | |
| // Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2 | |
| void PUBLIC_API cblas_ssyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const float* x, const int x_inc, | |
| const float* y, const int y_inc, | |
| float* a, const int a_ld); | |
| void PUBLIC_API cblas_dsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const double* x, const int x_inc, | |
| const double* y, const int y_inc, | |
| double* a, const int a_ld); | |
| // Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2 | |
| void PUBLIC_API cblas_sspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const float alpha, | |
| const float* x, const int x_inc, | |
| const float* y, const int y_inc, | |
| float* ap); | |
| void PUBLIC_API cblas_dspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, | |
| const int n, | |
| const double alpha, | |
| const double* x, const int x_inc, | |
| const double* y, const int y_inc, | |
| double* ap); | |
| // ================================================================================================= | |
| // BLAS level-3 (matrix-matrix) routines | |
| // ================================================================================================= | |
| // General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM | |
| void PUBLIC_API cblas_sgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, | |
| const int m, const int n, const int k, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float* b, const int b_ld, | |
| const float beta, | |
| float* c, const int c_ld); | |
| void PUBLIC_API cblas_dgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, | |
| const int m, const int n, const int k, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double* b, const int b_ld, | |
| const double beta, | |
| double* c, const int c_ld); | |
| void PUBLIC_API cblas_cgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, | |
| const int m, const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| void PUBLIC_API cblas_zgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, | |
| const int m, const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| // Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM | |
| void PUBLIC_API cblas_ssymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, | |
| const int m, const int n, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float* b, const int b_ld, | |
| const float beta, | |
| float* c, const int c_ld); | |
| void PUBLIC_API cblas_dsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, | |
| const int m, const int n, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double* b, const int b_ld, | |
| const double beta, | |
| double* c, const int c_ld); | |
| void PUBLIC_API cblas_csymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| void PUBLIC_API cblas_zsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| // Hermitian matrix-matrix multiplication: CHEMM/ZHEMM | |
| void PUBLIC_API cblas_chemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| void PUBLIC_API cblas_zhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| // Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK | |
| void PUBLIC_API cblas_ssyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, | |
| const int n, const int k, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float beta, | |
| float* c, const int c_ld); | |
| void PUBLIC_API cblas_dsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, | |
| const int n, const int k, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double beta, | |
| double* c, const int c_ld); | |
| void PUBLIC_API cblas_csyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| void PUBLIC_API cblas_zsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| // Rank-K update of a hermitian matrix: CHERK/ZHERK | |
| void PUBLIC_API cblas_cherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, | |
| const int n, const int k, | |
| const float alpha, | |
| const void* a, const int a_ld, | |
| const float beta, | |
| void* c, const int c_ld); | |
| void PUBLIC_API cblas_zherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, | |
| const int n, const int k, | |
| const double alpha, | |
| const void* a, const int a_ld, | |
| const double beta, | |
| void* c, const int c_ld); | |
| // Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K | |
| void PUBLIC_API cblas_ssyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, | |
| const int n, const int k, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| const float* b, const int b_ld, | |
| const float beta, | |
| float* c, const int c_ld); | |
| void PUBLIC_API cblas_dsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, | |
| const int n, const int k, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| const double* b, const int b_ld, | |
| const double beta, | |
| double* c, const int c_ld); | |
| void PUBLIC_API cblas_csyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| void PUBLIC_API cblas_zsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const void* beta, | |
| void* c, const int c_ld); | |
| // Rank-2K update of a hermitian matrix: CHER2K/ZHER2K | |
| void PUBLIC_API cblas_cher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const float beta, | |
| void* c, const int c_ld); | |
| void PUBLIC_API cblas_zher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, | |
| const int n, const int k, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| const void* b, const int b_ld, | |
| const double beta, | |
| void* c, const int c_ld); | |
| // Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM | |
| void PUBLIC_API cblas_strmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| float* b, const int b_ld); | |
| void PUBLIC_API cblas_dtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| double* b, const int b_ld); | |
| void PUBLIC_API cblas_ctrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| void* b, const int b_ld); | |
| void PUBLIC_API cblas_ztrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| void* b, const int b_ld); | |
| // Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM | |
| void PUBLIC_API cblas_strsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| float* b, const int b_ld); | |
| void PUBLIC_API cblas_dtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| double* b, const int b_ld); | |
| void PUBLIC_API cblas_ctrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| void* b, const int b_ld); | |
| void PUBLIC_API cblas_ztrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| void* b, const int b_ld); | |
| // ================================================================================================= | |
| // Extra non-BLAS routines (level-X) | |
| // ================================================================================================= | |
| // Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD | |
| void PUBLIC_API cblas_shad(const int n, | |
| const float alpha, | |
| const float* x, const int x_inc, | |
| const float* y, const int y_inc, | |
| const float beta, | |
| float* z, const int z_inc); | |
| void PUBLIC_API cblas_dhad(const int n, | |
| const double alpha, | |
| const double* x, const int x_inc, | |
| const double* y, const int y_inc, | |
| const double beta, | |
| double* z, const int z_inc); | |
| void PUBLIC_API cblas_chad(const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| const void* beta, | |
| void* z, const int z_inc); | |
| void PUBLIC_API cblas_zhad(const int n, | |
| const void* alpha, | |
| const void* x, const int x_inc, | |
| const void* y, const int y_inc, | |
| const void* beta, | |
| void* z, const int z_inc); | |
| // Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY | |
| void PUBLIC_API cblas_somatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const float alpha, | |
| const float* a, const int a_ld, | |
| float* b, const int b_ld); | |
| void PUBLIC_API cblas_domatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const double alpha, | |
| const double* a, const int a_ld, | |
| double* b, const int b_ld); | |
| void PUBLIC_API cblas_comatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| void* b, const int b_ld); | |
| void PUBLIC_API cblas_zomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, | |
| const int m, const int n, | |
| const void* alpha, | |
| const void* a, const int a_ld, | |
| void* b, const int b_ld); | |
| // Im2col function (non-BLAS function): SIM2COL/DIM2COL/CIM2COL/ZIM2COL/HIM2COL | |
| void PUBLIC_API cblas_sim2col(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const float* im, | |
| float* col); | |
| void PUBLIC_API cblas_dim2col(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const double* im, | |
| double* col); | |
| void PUBLIC_API cblas_cim2col(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const void* im, | |
| void* col); | |
| void PUBLIC_API cblas_zim2col(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const void* im, | |
| void* col); | |
| // Col2im function (non-BLAS function): SCOL2IM/DCOL2IM/CCOL2IM/ZCOL2IM/HCOL2IM | |
| void PUBLIC_API cblas_scol2im(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const float* col, | |
| float* im); | |
| void PUBLIC_API cblas_dcol2im(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const double* col, | |
| double* im); | |
| void PUBLIC_API cblas_ccol2im(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const void* col, | |
| void* im); | |
| void PUBLIC_API cblas_zcol2im(const CLBlastKernelMode kernel_mode, | |
| const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, | |
| const void* col, | |
| void* im); | |
| // ================================================================================================= | |
| } // extern "C" | |
| // CLBLAST_CLBLAST_NETLIB_C_H_ | |