Matrix multiplication with MKL

2019-07-18 13:20发布

I have the CSR coordinates of a matrix.

/* alloc space for COO matrix */
int *coo_rows = (int*) malloc(K.n_rows * sizeof(int));
int *coo_cols = (int*) malloc(K.n_rows * sizeof(int));
float *coo_vals = (float*) malloc(K.n_rows * sizeof(float));

/*Load coo values*/

int *rowptrs = (int*) malloc((N_unique+1)*sizeof(int));
int *colinds = (int*) malloc(K.n_rows *sizeof(int));
double *vals = (double*) malloc(K.n_rows *sizeof(double));

/* take csr values */
int job[] = {
        2, // job(1)=2 (coo->csr with sorting)
        0, // job(2)=1 (one-based indexing for csr matrix)
        0, // job(3)=1 (one-based indexing for coo matrix)
        0, // empty
        n1, // job(5)=nnz (sets nnz for csr matrix)
        0 // job(6)=0 (all output arrays filled)
        };
int info;
mkl_scsrcoo(job, &n, vals, colinds, rowptrs, &n1, coo_vals, coo_rows, coo_cols, &info);
assert(info == 0 && "Converted COO->CSR");

Now I want to apply the mkl_dcsrmm function to compute C := alpha*A*B + beta*C with beta = 0;

/* function declaration */
void mkl_dcsrmm (char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc);

Since now I have.

int A_rows = ..., A_cols = ..., C_cols = ...
double alpha = 1.0;

mkl_dcsrmm ((char*)"N", &A_rows, &C_cols, &A_cols, &alpha, char *matdescra, vals, coo_cols, rowptrs, colinds , double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc);

I found some difficulties on filling the inputs. Could you please help me to fill the rest of the inputs?

A specific input for which I want to go in more details is the matdescra. I borrowed the following code from cspblas_ccsr example

char matdescra[6];
matdescra[0] = 'g';
matdescra[1] = 'l';
matdescra[2] = 'n';
matdescra[3] = 'c';

but I have some questions about that. The matrix A I am working is not triangular and the initialization of this char array engage you to make such a declaration, how should I configure the parameters of the matdescra array?

标签: c intel-mkl
1条回答
啃猪蹄的小仙女
2楼-- · 2019-07-18 14:05

Here is what I use, and what works for me.

char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'};
/* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6
G: General. D: Diagonal
L/U Lower/Upper triangular (ignored with G)
N: non-unit diagonal (ignored with G)
C: zero-based indexing.
*/

Complete Example

Here is a complete example. I first create a random matrix by filling a dense matrix with a specified density of Non-Zero elements. Then I convert it to a sparse matrix in CSR-format. Finally, I do the multiplication using mkl_dcsrmm. As a possible check (check not done), I do the same multiplication using the cblas_dgemm function with the dense matrix.

#include "mkl.h"
#include "mkl_spblas.h"
#include <stddef.h>  // For NULL
#include <stdlib.h>  // for rand()
#include <assert.h>
#include <stdio.h>
#include <limits.h>


// Compute C = A * B; where A is sparse and B is dense.
int main() {
    MKL_INT m=10, n=5, k=11;
    const double sparsity = 0.9; ///< @param sparsity Values below which are set to zero (sampled from uniform(0,1)-distribution).
    double  *A_dense;
    double *B;
    double *C;
    double alpha = 1.0;
    double beta = 0.0;
    const int allignment = 64;

    // Seed the RNG to always be the same
    srand(42);

    // Allocate memory to matrices
    A_dense = (double *)mkl_malloc( m*k*sizeof( double ), allignment);
    B = (double *)mkl_malloc( k*n*sizeof( double ), allignment);
    C = (double *)mkl_malloc( m*n*sizeof( double ), allignment);
    if (A_dense == NULL || B == NULL || C == NULL) {
        printf("ERROR: Can't allocate memory for matrices. Aborting... \n\n");
        mkl_free(A_dense);
        mkl_free(B);
        mkl_free(C);
        return 1;
    }

    // Initializing matrix data
    int i;
    int nzmax = 0;
    for (i = 0; i < (m*k); i++) {
        double val = rand() / (double)RAND_MAX;
        if ( val < sparsity ) {
            A_dense[i] = 0.0;
        } else {
            A_dense[i] = val;
            nzmax++;
        }
    }
    for (i = 0; i < (k*n); i++) {
            B[i] = rand();
    }
    for (i = 0; i < (m*n); i++) {
            C[i]  = 0.0;
    }

    // Convert A to a sparse matrix in CSR format.

    // INFO: https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8.htm#GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8
    MKL_INT job[6];
    job[0] = 0;  // convert TO CSR.
    job[1] = 0;  // Zero-based indexing for input.
    job[2] = 0;  // Zero-based indexing for output.
    job[3] = 2;  // adns is  a whole matrix A.
    job[4] = nzmax;  // Maximum number of non-zero elements allowed.
    job[5] = 3;  // all 3 arays are generated for output.

    /* JOB: conversion parameters
    * m: number of rows of A.
    * k: number of columns of A.
    * adns: (input/output). Array containing non-zero elements of the matrix A.
    * lda: specifies the leading dimension of adns. must be at least max(1, m).
    * acsr: (input/output) array containing non-zero elements of the matrix A. 
    * ja: array containing the column indices.
    * ia length m+1,  rowIndex.
    * OUTPUT:
    * info: 0 if successful. i if interrupted at i-th row because of lack of space.
    */
    int info = -1;
    printf("nzmax:\t %d\n", nzmax);

    double *A_sparse = mkl_malloc(nzmax * sizeof(double), allignment);
    if (A_sparse == NULL) {
        printf("ERROR: Could not allocate enough space to A_sparse.\n");
        return 1;
    }
    MKL_INT *A_sparse_cols = mkl_malloc(nzmax * sizeof(MKL_INT), allignment);
    if (A_sparse_cols == NULL) {
        printf("ERROR: Could not allocate enough space to A_sparse_cols.\n");
        return 1;
    }
    MKL_INT *A_sparse_rowInd = mkl_malloc((m+1) * sizeof(MKL_INT), allignment);
    if (A_sparse_rowInd == NULL) {
        printf("ERROR: Could not allocate enough space to A_sparse_rowInd.\n");
        return 1;
    }
    mkl_ddnscsr(job, &m, &k, A_dense, &k, A_sparse, A_sparse_cols, A_sparse_rowInd, &info);
    if(info != 0) {
        printf("WARNING: info=%d, expected 0.\n", info);
    }
    assert(info == 0);

    char transa = 'n';
    MKL_INT ldb = n, ldc=n;
    char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'};
    /* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6
    G: General. D: Diagonal
    L/U Lower/Upper triangular (ignored with G)
    N: non-unit diagonal (ignored with G)
    C: zero-based indexing.
    */


    mkl_dcsrmm(&transa, &m, &n, &m, &alpha, matdescra, A_sparse, A_sparse_cols, 
            A_sparse_rowInd, &(A_sparse_rowInd[1]), B, &ldb, &beta, C, &ldc);
    // The same computation in dense format
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                m, n, k, alpha, A_dense, k, B, n, beta, C, n);

    mkl_free(A_dense);
    mkl_free(A_sparse);
    mkl_free(A_sparse_cols);
    mkl_free(A_sparse_rowInd);
    mkl_free(B);
    mkl_free(C);
    return 0;
}
查看更多
登录 后发表回答