c++ - Should Eigen dense matrix * dense vector multiplication be 7 times slower than GSL? -
the answer this question of mine made me expect (for matrices 1/4 of non-vanishing entries) in eigen product dense matrix * dense vector should outperform sparse matrix*dense vector.
not see opposite, both outperformed gsl factor of 7 , 4 respectively.
am using eigen incorrectly? timing carelessly? startled.
my compile options read:
ludi@ludi-m17xr4:~/desktop/tests$ g++ -o eigenfill.x eigenfill.cc -l/usr/local/lib -lgsl -lgslcblas && ./eigenfill.x
my code reads:
#include <iostream> #include <stdio.h> #include <stdlib.h> #include <eigen/sparse> #include <eigen/dense> #include <gsl/gsl_matrix.h> #include <sys/time.h> #include <gsl/gsl_blas.h> #define helix 100 #define rows helix*helix #define cols rows #define filling rows/4 #define reps 10 using namespace eigen; /*-- declarationes --*/ int fillsparsematrix(sparsematrix<double> & mat); int filldensematrices(matrixxd & mat, gsl_matrix *testmat); double vee(int i, int j); int set_vectors_randomly(gsl_vector * v2, vectorxd v1); int main() { int rep; struct timeval tval_before, tval_after, tval_result; gsl_matrix *testmat = gsl_matrix_calloc(rows, cols); gsl_vector *v2 =gsl_vector_calloc(cols); gsl_vector *prod =gsl_vector_calloc(cols); sparsematrix<double> mat(rows,cols); // default column major matrixxd mat(rows,cols); // default column major vectorxd v1(cols), vv1(cols); fillsparsematrix(mat); filldensematrices(mat, testmat); printf("\n/*--- --- --- ---*/\n"); for(rep=0;rep<reps;rep++) { set_vectors_randomly(v2, v1); gettimeofday(&tval_before, null); vv1 = mat*v1; gettimeofday(&tval_after, null); timersub(&tval_after, &tval_before, &tval_result); printf("time 1 product, sparse eigen / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec); gettimeofday(&tval_before, null); gsl_blas_dgemv( cblasnotrans,1.0, testmat, v2, 0.0, prod); gettimeofday(&tval_after, null); timersub(&tval_after, &tval_before, &tval_result); printf("time 1 product, gsl / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec); gettimeofday(&tval_before, null); vv1 = mat*v1; gettimeofday(&tval_after, null); timersub(&tval_after, &tval_before, &tval_result); printf("time 1 product, dense eigen / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec); printf("/*--- --- --- ---*/\n\n"); //std::cout << mat << std::endl; } gsl_matrix_free(testmat); printf("--- --- --->done\n"); return(0); } /*-- --*/ int fillsparsematrix(sparsematrix<double> &mat) { int i, j; eigen::vectorxd vres; mat.reserve(eigen::vectorxi::constant(cols,filling)); printf("filling sparse matrix ..."); for(i=0;i<rows;i++) { if(i%2500==0){printf("i= %i\n", i);} for(j=0;j<cols;j++) { if (vee(i,j) != 0){mat.insert(i,j) = vee(i,j); /*alternative: mat.coeffref(i,j) += v_ij;*/ } } } return(0); } /*-- --*/ /*-- --*/ int filldensematrices(matrixxd &mat, gsl_matrix * testmat) { int i, j; eigen::vectorxd vres; double aux; printf("filling dense matrix ..."); for(i=0;i<rows;i++) { if(i%2500==0){printf("i= %i\n", i);} for(j=0;j<cols;j++) { aux = vee(i,j); if (aux != 0) { mat(i,j) = aux; gsl_matrix_set(testmat, i, j, aux); } } } return(0); } /*-- --*/ double vee(int i, int j) { double result = 0.0; if(i%4 == 0){result =1.0;} return result; } /*-- --*/ int set_vectors_randomly(gsl_vector * v2, vectorxd v1){ printf("setting vectors rendomly anew ...\n"); (int j=0;j<cols;j++) { double r=drand48(); v1(j) =r; gsl_vector_set(v2, j, r); } return(0); } /*-- --*/
with eigen, performance abysmal when compiling without compiler optimizations. there several ways increase performance dramatically:
- with optimizations turned on (-o2 or -o3 in g++) performance can two-three orders of magnitude higher.
- an additional (smaller) speedup can attained defining
ndebug
before including eigen library. disables bounds checking, make sure there no issues before enabling feature. - eigen can take advantage of vectorization (sse of 3.2.6 , avx of 3.3, powerpc , arm well) leading further speedups. enable relevant flags in compiler.
- enabling omp can lead speedups well. again, enable relevant flags in compiler , eigen rest.
Comments
Post a Comment