c++ - Should Eigen dense matrix * dense vector multiplication be 7 times slower than GSL? -


the answer this question of mine made me expect (for matrices 1/4 of non-vanishing entries) in eigen product dense matrix * dense vector should outperform sparse matrix*dense vector.

not see opposite, both outperformed gsl factor of 7 , 4 respectively.

am using eigen incorrectly? timing carelessly? startled.

my compile options read:

ludi@ludi-m17xr4:~/desktop/tests$ g++ -o eigenfill.x eigenfill.cc -l/usr/local/lib -lgsl -lgslcblas && ./eigenfill.x

my code reads:

#include <iostream> #include <stdio.h> #include <stdlib.h>      #include <eigen/sparse> #include <eigen/dense> #include <gsl/gsl_matrix.h> #include <sys/time.h> #include <gsl/gsl_blas.h> #define helix 100 #define rows helix*helix #define cols rows #define filling rows/4 #define reps 10   using namespace eigen;  /*-- declarationes --*/ int fillsparsematrix(sparsematrix<double> & mat); int filldensematrices(matrixxd & mat, gsl_matrix *testmat); double vee(int i, int j); int set_vectors_randomly(gsl_vector * v2, vectorxd v1);  int main() { int rep;     struct timeval tval_before, tval_after, tval_result;  gsl_matrix *testmat     = gsl_matrix_calloc(rows, cols); gsl_vector *v2      =gsl_vector_calloc(cols); gsl_vector *prod    =gsl_vector_calloc(cols);  sparsematrix<double> mat(rows,cols);         // default column major matrixxd mat(rows,cols);         // default column major vectorxd v1(cols), vv1(cols);  fillsparsematrix(mat); filldensematrices(mat, testmat);     printf("\n/*--- --- --- ---*/\n"); for(rep=0;rep<reps;rep++) { set_vectors_randomly(v2, v1);      gettimeofday(&tval_before, null);        vv1 = mat*v1;     gettimeofday(&tval_after, null);     timersub(&tval_after, &tval_before, &tval_result);      printf("time 1 product, sparse eigen / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);     gettimeofday(&tval_before, null);        gsl_blas_dgemv( cblasnotrans,1.0, testmat, v2, 0.0, prod);     gettimeofday(&tval_after, null);     timersub(&tval_after, &tval_before, &tval_result);     printf("time 1 product, gsl / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);      gettimeofday(&tval_before, null);        vv1 = mat*v1;     gettimeofday(&tval_after, null);     timersub(&tval_after, &tval_before, &tval_result);     printf("time 1 product, dense eigen / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);     printf("/*--- --- --- ---*/\n\n");     //std::cout << mat << std::endl; } gsl_matrix_free(testmat);    printf("--- --- --->done\n"); return(0); }  /*-- --*/ int fillsparsematrix(sparsematrix<double> &mat) { int i, j; eigen::vectorxd vres; mat.reserve(eigen::vectorxi::constant(cols,filling));  printf("filling sparse matrix ...");     for(i=0;i<rows;i++)     {         if(i%2500==0){printf("i= %i\n", i);}     for(j=0;j<cols;j++)         {         if (vee(i,j) != 0){mat.insert(i,j) = vee(i,j);    /*alternative: mat.coeffref(i,j) += v_ij;*/ }         }      }  return(0); } /*-- --*/  /*-- --*/ int filldensematrices(matrixxd &mat, gsl_matrix * testmat) { int i, j; eigen::vectorxd vres; double aux; printf("filling dense matrix ...");     for(i=0;i<rows;i++)     {         if(i%2500==0){printf("i= %i\n", i);}     for(j=0;j<cols;j++)         {         aux = vee(i,j);         if (aux != 0)         {         mat(i,j) = aux;             gsl_matrix_set(testmat, i, j, aux);         }         }      } return(0); } /*-- --*/  double vee(int i, int j) {     double result = 0.0;      if(i%4 == 0){result =1.0;}      return result; } /*-- --*/ int set_vectors_randomly(gsl_vector * v2, vectorxd v1){ printf("setting vectors rendomly anew ...\n"); (int j=0;j<cols;j++)  { double r=drand48(); v1(j) =r; gsl_vector_set(v2, j, r);  } return(0); } /*-- --*/ 

with eigen, performance abysmal when compiling without compiler optimizations. there several ways increase performance dramatically:

  • with optimizations turned on (-o2 or -o3 in g++) performance can two-three orders of magnitude higher.
  • an additional (smaller) speedup can attained defining ndebug before including eigen library. disables bounds checking, make sure there no issues before enabling feature.
  • eigen can take advantage of vectorization (sse of 3.2.6 , avx of 3.3, powerpc , arm well) leading further speedups. enable relevant flags in compiler.
  • enabling omp can lead speedups well. again, enable relevant flags in compiler , eigen rest.

Comments

Popular posts from this blog

javascript - Chart.js (Radar Chart) different scaleLineColor for each scaleLine -

apache - Error with PHP mail(): Multiple or malformed newlines found in additional_header -

android - Go back to previous fragment -