c++ - Should Eigen dense matrix * dense vector multiplication be 7 times slower than GSL? -

- January 15, 2014

the answer this question of mine made me expect (for matrices 1/4 of non-vanishing entries) in eigen product dense matrix * dense vector should outperform sparse matrix*dense vector.

not see opposite, both outperformed gsl factor of 7 , 4 respectively.

am using eigen incorrectly? timing carelessly? startled.

my compile options read:

ludi@ludi-m17xr4:~/desktop/tests$ g++ -o eigenfill.x eigenfill.cc -l/usr/local/lib -lgsl -lgslcblas && ./eigenfill.x

my code reads:

#include <iostream> #include <stdio.h> #include <stdlib.h>      #include <eigen/sparse> #include <eigen/dense> #include <gsl/gsl_matrix.h> #include <sys/time.h> #include <gsl/gsl_blas.h> #define helix 100 #define rows helix*helix #define cols rows #define filling rows/4 #define reps 10   using namespace eigen;  /*-- declarationes --*/ int fillsparsematrix(sparsematrix<double> & mat); int filldensematrices(matrixxd & mat, gsl_matrix *testmat); double vee(int i, int j); int set_vectors_randomly(gsl_vector * v2, vectorxd v1);  int main() { int rep;     struct timeval tval_before, tval_after, tval_result;  gsl_matrix *testmat     = gsl_matrix_calloc(rows, cols); gsl_vector *v2      =gsl_vector_calloc(cols); gsl_vector *prod    =gsl_vector_calloc(cols);  sparsematrix<double> mat(rows,cols);         // default column major matrixxd mat(rows,cols);         // default column major vectorxd v1(cols), vv1(cols);  fillsparsematrix(mat); filldensematrices(mat, testmat);     printf("\n/*--- --- --- ---*/\n"); for(rep=0;rep<reps;rep++) { set_vectors_randomly(v2, v1);      gettimeofday(&tval_before, null);        vv1 = mat*v1;     gettimeofday(&tval_after, null);     timersub(&tval_after, &tval_before, &tval_result);      printf("time 1 product, sparse eigen / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);     gettimeofday(&tval_before, null);        gsl_blas_dgemv( cblasnotrans,1.0, testmat, v2, 0.0, prod);     gettimeofday(&tval_after, null);     timersub(&tval_after, &tval_before, &tval_result);     printf("time 1 product, gsl / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);      gettimeofday(&tval_before, null);        vv1 = mat*v1;     gettimeofday(&tval_after, null);     timersub(&tval_after, &tval_before, &tval_result);     printf("time 1 product, dense eigen / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);     printf("/*--- --- --- ---*/\n\n");     //std::cout << mat << std::endl; } gsl_matrix_free(testmat);    printf("--- --- --->done\n"); return(0); }  /*-- --*/ int fillsparsematrix(sparsematrix<double> &mat) { int i, j; eigen::vectorxd vres; mat.reserve(eigen::vectorxi::constant(cols,filling));  printf("filling sparse matrix ...");     for(i=0;i<rows;i++)     {         if(i%2500==0){printf("i= %i\n", i);}     for(j=0;j<cols;j++)         {         if (vee(i,j) != 0){mat.insert(i,j) = vee(i,j);    /*alternative: mat.coeffref(i,j) += v_ij;*/ }         }      }  return(0); } /*-- --*/  /*-- --*/ int filldensematrices(matrixxd &mat, gsl_matrix * testmat) { int i, j; eigen::vectorxd vres; double aux; printf("filling dense matrix ...");     for(i=0;i<rows;i++)     {         if(i%2500==0){printf("i= %i\n", i);}     for(j=0;j<cols;j++)         {         aux = vee(i,j);         if (aux != 0)         {         mat(i,j) = aux;             gsl_matrix_set(testmat, i, j, aux);         }         }      } return(0); } /*-- --*/  double vee(int i, int j) {     double result = 0.0;      if(i%4 == 0){result =1.0;}      return result; } /*-- --*/ int set_vectors_randomly(gsl_vector * v2, vectorxd v1){ printf("setting vectors rendomly anew ...\n"); (int j=0;j<cols;j++)  { double r=drand48(); v1(j) =r; gsl_vector_set(v2, j, r);  } return(0); } /*-- --*/

with eigen, performance abysmal when compiling without compiler optimizations. there several ways increase performance dramatically:

with optimizations turned on (-o2 or -o3 in g++) performance can two-three orders of magnitude higher.
an additional (smaller) speedup can attained defining ndebug before including eigen library. disables bounds checking, make sure there no issues before enabling feature.
eigen can take advantage of vectorization (sse of 3.2.6 , avx of 3.3, powerpc , arm well) leading further speedups. enable relevant flags in compiler.
enabling omp can lead speedups well. again, enable relevant flags in compiler , eigen rest.

Search This Blog

WIKI

c++ - Should Eigen dense matrix * dense vector multiplication be 7 times slower than GSL? -

Comments

Post a Comment

Popular posts from this blog

jquery - ReferenceError: CKEDITOR is not defined -

javascript - Chart.js (Radar Chart) different scaleLineColor for each scaleLine -

apache - Error with PHP mail(): Multiple or malformed newlines found in additional_header -