diff --git a/cg.c b/cg.c index de94cab..6a84e71 100644 --- a/cg.c +++ b/cg.c @@ -1,121 +1,223 @@ #include "cg.h" const double TOLERANCE = 1.0e-10; /* cgsolver solves the linear equation A*x = b where A is of size m x n Code based on MATLAB code (from wikipedia ;-) ): function x = conjgrad(A, b, x) r = b - A * x; p = r; rsold = r' * r; for i = 1:length(b) Ap = A * p; alpha = rsold / (p' * Ap); x = x + alpha * p; r = r - alpha * Ap; rsnew = r' * r; if sqrt(rsnew) < 1e-10 break; end p = r + (rsnew / rsold) * p; rsold = rsnew; end end */ void cgsolver( double *A, double *b, double *x, int m, int n ){ double * r; double * rt; double * p; double * pt; double rsold; double rsnew; double * Ap; double * tmp; double alpha; int incx = 1; int incy = 1; int lda = m; double al = 1.; double be = 0.; int k = 0; r = (double*) malloc(n* sizeof(double)); rt = (double*) malloc(n* sizeof(double)); p = (double*) malloc(n* sizeof(double)); pt = (double*) malloc(n* sizeof(double)); Ap = (double*) malloc(n* sizeof(double)); tmp = (double*) malloc(n* sizeof(double)); // r = b - A * x; cblas_dgemv (CblasColMajor, CblasNoTrans, m, n, al , A, lda, x, incx, be, Ap, incy); cblas_dcopy(n,b,1,tmp,1); cblas_daxpy(n, -1. , Ap,1, tmp, 1 ); cblas_dcopy(n,tmp,1,r,1); // p = r; cblas_dcopy (n, r, incx, p, incy); cblas_dcopy(n,r,1,rt,1); cblas_dcopy(n,p,1,pt,1); // rsold = r' * r; rsold = cblas_ddot (n,r,incx,rt,incy); // for i = 1:length(b) while ( k < n ){ // Ap = A * p; cblas_dgemv (CblasColMajor, CblasNoTrans, m, n, al, A, lda, p, incx, be, Ap, incy); // alpha = rsold / (p' * Ap); alpha = rsold / fmax( cblas_ddot(n, pt, incx, Ap, incy ), NEARZERO ); // x = x + alpha * p; cblas_daxpy(n, alpha, p, 1, x, 1); // r = r - alpha * Ap; cblas_daxpy(n, -alpha, Ap, 1, r, 1); // rsnew = r' * r; rsnew = cblas_ddot (n,r,incx,r,incy); // if sqrt(rsnew) < 1e-10 // break; if ( sqrt(rsnew) < TOLERANCE ) break; // Convergence test // p = r + (rsnew / rsold) * p; cblas_dcopy(n,r,1,tmp,1); cblas_daxpy(n, (double)(rsnew/rsold), p, 1, tmp, 1); cblas_dcopy(n,tmp,1,p,1); cblas_dcopy(n,p,1,pt,1); // rsold = rsnew; rsold = rsnew; k++; } printf("\t[STEP %d] residual = %E\n",k,sqrt(rsold)); free(r); free(rt); free(p); free(pt); free(Ap); free(tmp); } +/* +Sparse version of the cg solver +*/ + +void cgsolver_sparse( double *Aval, int *I, int *J, double *b, double *x, int n){ + double * r; + double * rt; + double * p; + double * pt; + double rsold; + double rsnew; + double * Ap; + double * tmp; + double alpha; + + int incx = 1; + int incy = 1; + + int k = 0; + + r = (double*) malloc(n* sizeof(double)); + rt = (double*) malloc(n* sizeof(double)); + p = (double*) malloc(n* sizeof(double)); + pt = (double*) malloc(n* sizeof(double)); + Ap = (double*) malloc(n* sizeof(double)); + tmp = (double*) malloc(n* sizeof(double)); + +// r = b - A * x; + smvm(n, Aval, J, I, x, Ap); + cblas_dcopy(n,b,1,tmp,1); + cblas_daxpy(n, -1. , Ap,1, tmp, 1 ); + cblas_dcopy(n,tmp,1,r,1); + +// p = r; + cblas_dcopy (n, r, incx, p, incy); + + cblas_dcopy(n,r,1,rt,1); + cblas_dcopy(n,p,1,pt,1); +// rsold = r' * r; + rsold = cblas_ddot (n,r,incx,rt,incy); + + + +// for i = 1:length(b) + while ( k < n ){ +// Ap = A * p; + smvm(n, Aval, J, I, p, Ap); +// alpha = rsold / (p' * Ap); + alpha = rsold / fmax( cblas_ddot(n, pt, incx, Ap, incy ), NEARZERO ); +// x = x + alpha * p; + memset(x, 0, n*sizeof(double)); + cblas_daxpy(n, alpha, p, 1, x, 1); +// r = r - alpha * Ap; + cblas_daxpy(n, -alpha, Ap, 1, r, 1); +// rsnew = r' * r; + rsnew = cblas_ddot (n,r,incx,r,incy); +// if sqrt(rsnew) < 1e-10 +// break; + if ( sqrt(rsnew) < TOLERANCE ) break; // Convergence test +// p = r + (rsnew / rsold) * p; + memset(p, 0, n*sizeof(double)); + cblas_dcopy(n,r,1,tmp,1); + cblas_daxpy(n, (double)(rsnew/rsold), p, 1, tmp, 1); + cblas_dcopy(n,tmp,1,p,1); + + cblas_dcopy(n,p,1,pt,1); +// rsold = rsnew; + rsold = rsnew; + k++; + } + + printf("\t[STEP %d] residual = %E\n",k,sqrt(rsold)); + + free(r); + free(rt); + free(p); + free(pt); + free(Ap); + free(tmp); +} + + +/* +Sparse matrix vector multiplication +*/ + +void smvm(int m, const double* val, const int* col, const int* row, const double* x, double* y) +{ + for (int i=0; i #include +#include #include #include "util.h" #include "parameters.h" #include void cgsolver( double *A, double *b, double *x, int m, int n ); double * init_source_term(int n, double h); +void smvm(int m, const double* val, const int* col, const int* row, const double* x, double* y); +void cgsolver_sparse( double *Aval, int *I, int *J, double *b, double *x, int n); #endif /*CG_H_*/ diff --git a/cg_blas.c b/cg_blas.c index 82371b6..3bff599 100644 --- a/cg_blas.c +++ b/cg_blas.c @@ -1,68 +1,96 @@ #include #include #include #include #include +#include #include "util.h" #include "parameters.h" #include "cg.h" #include "second.h" +#include "mmio_wrapper.h" /* Implementation of a simple CG solver using matrix in the mtx format (Matrix market) Any matrix in that format can be used to test the code */ int main ( int argc, char **argv ) { double * A; - double * b; double * x; - double * x0; + double * b; + double t1,t2; +// Arrays and parameters to read and store the sparse matrix + double * val = NULL; + int * I = NULL; + int * J = NULL; + int N; + int nz; + const char * element_type ="d"; + int symmetrize=0; + + int m,n; struct size_m sA; double h; if (argc < 2) { fprintf(stderr, "Usage: %s [martix-market-filename]\n", argv[0]); exit(1); } else { A = read_mat(argv[1]); sA = get_size(argv[1]); } + if (loadMMSparseMatrix(argv[1], *element_type, true, &N, &N, &nz, &val, &I, &J, symmetrize)){ + fprintf (stderr, "!!!! loadMMSparseMatrix FAILED\n"); + return EXIT_FAILURE; + }else{ + printf("Matrix loaded from file %s\n",argv[1]); + printf("N = %d \n",N); + printf("nz = %d \n",nz); + printf("val[0] = %f \n",val[0]); + } m = sA.m; n = sA.n; h = 1./(double)n; b = init_source_term(n,h); x = (double*) malloc(n * sizeof(double)); - x0 = (double*) malloc(n * sizeof(double)); printf("Call cgsolver() on matrix size (%d x %d)\n",m,n); - t1 = second(); cgsolver( A, b, x, m, n ); t2 = second(); - printf("Time for CG = %f [s]\n",(t2-t1)); + printf("Time for CG (dense solver) = %f [s]\n",(t2-t1)); + printf("Call cgsolver_sparse() on matrix size (%d x %d)\n",m,n); + t1 = second(); + cgsolver_sparse( val, I, J, b, x, N ); + t2 = second(); + printf("Time for CG (sparse solver) = %f [s]\n",(t2-t1)); free(A); free(b); free(x); - free(x0); + free(val); + free(I); + free(J); + +// free(x0); return 0; }