diff --git a/cg.c b/cg.c index 6a84e71..9f3bbd1 100644 --- a/cg.c +++ b/cg.c @@ -1,223 +1,212 @@ #include "cg.h" const double TOLERANCE = 1.0e-10; - +const double NEARZERO = 1.0e-14; /* cgsolver solves the linear equation A*x = b where A is of size m x n Code based on MATLAB code (from wikipedia ;-) ): function x = conjgrad(A, b, x) r = b - A * x; p = r; rsold = r' * r; for i = 1:length(b) Ap = A * p; alpha = rsold / (p' * Ap); x = x + alpha * p; r = r - alpha * Ap; rsnew = r' * r; if sqrt(rsnew) < 1e-10 break; end p = r + (rsnew / rsold) * p; rsold = rsnew; end end */ void cgsolver( double *A, double *b, double *x, int m, int n ){ double * r; - double * rt; double * p; - double * pt; double rsold; double rsnew; double * Ap; double * tmp; double alpha; - int incx = 1; - int incy = 1; int lda = m; double al = 1.; double be = 0.; int k = 0; r = (double*) malloc(n* sizeof(double)); - rt = (double*) malloc(n* sizeof(double)); p = (double*) malloc(n* sizeof(double)); - pt = (double*) malloc(n* sizeof(double)); Ap = (double*) malloc(n* sizeof(double)); tmp = (double*) malloc(n* sizeof(double)); -// r = b - A * x; - cblas_dgemv (CblasColMajor, CblasNoTrans, m, n, al , A, lda, x, incx, be, Ap, incy); - cblas_dcopy(n,b,1,tmp,1); - cblas_daxpy(n, -1. , Ap,1, tmp, 1 ); - cblas_dcopy(n,tmp,1,r,1); -// p = r; - cblas_dcopy (n, r, incx, p, incy); - - cblas_dcopy(n,r,1,rt,1); - cblas_dcopy(n,p,1,pt,1); -// rsold = r' * r; - rsold = cblas_ddot (n,r,incx,rt,incy); +// r = b - A * x; + memset(Ap, 0., n * sizeof(double)); + cblas_dgemv (CblasColMajor, CblasNoTrans, m, n, 1. , A, n, x, 1, 0., Ap, 1); + cblas_dcopy(n, b, 1, tmp, 1); + cblas_daxpy(n, -1., Ap, 1, tmp, 1 ); + cblas_dcopy(n, tmp, 1, r, 1); +// p = r; + cblas_dcopy (n, r, 1, p, 1); +// rsold = r' * r; + rsold = cblas_ddot (n, r, 1, r, 1); -// for i = 1:length(b) +// for i = 1:length(b) while ( k < n ){ -// Ap = A * p; - cblas_dgemv (CblasColMajor, CblasNoTrans, m, n, al, A, lda, p, incx, be, Ap, incy); -// alpha = rsold / (p' * Ap); - alpha = rsold / fmax( cblas_ddot(n, pt, incx, Ap, incy ), NEARZERO ); -// x = x + alpha * p; +// Ap = A * p; + memset(Ap, 0., n * sizeof(double)); + cblas_dgemv (CblasColMajor, CblasNoTrans, m, n, al, A, lda, p, 1, be, Ap, 1); +// alpha = rsold / (p' * Ap); + alpha = rsold / fmax( cblas_ddot(n, p, 1, Ap, 1), rsold * NEARZERO ); +// x = x + alpha * p; cblas_daxpy(n, alpha, p, 1, x, 1); -// r = r - alpha * Ap; +// r = r - alpha * Ap; cblas_daxpy(n, -alpha, Ap, 1, r, 1); -// rsnew = r' * r; - rsnew = cblas_ddot (n,r,incx,r,incy); -// if sqrt(rsnew) < 1e-10 -// break; +// rsnew = r' * r; + rsnew = cblas_ddot (n, r, 1, r, 1); +// if sqrt(rsnew) < 1e-10 +// break; if ( sqrt(rsnew) < TOLERANCE ) break; // Convergence test -// p = r + (rsnew / rsold) * p; - cblas_dcopy(n,r,1,tmp,1); +// p = r + (rsnew / rsold) * p; + cblas_dcopy(n, r, 1, tmp, 1); cblas_daxpy(n, (double)(rsnew/rsold), p, 1, tmp, 1); - cblas_dcopy(n,tmp,1,p,1); + cblas_dcopy(n, tmp, 1, p, 1); - cblas_dcopy(n,p,1,pt,1); -// rsold = rsnew; +// rsold = rsnew; rsold = rsnew; + printf("\t[STEP %d] residual = %E\r", k, sqrt(rsold)); + fflush(stdout); k++; } - printf("\t[STEP %d] residual = %E\n",k,sqrt(rsold)); + memset(r, 0., n * sizeof(double)); + cblas_dgemv (CblasColMajor, CblasNoTrans, m, n, al, A, lda, x, 1, be, r, 1); + cblas_daxpy(n, -1., b, 1, r, 1); + double res = cblas_ddot(n, r, 1, r, 1); + + printf("\t[STEP %d] residual = %E, ||Ax -b|| = %E\n",k,sqrt(rsold), sqrt(res)); free(r); - free(rt); free(p); - free(pt); free(Ap); free(tmp); } /* Sparse version of the cg solver */ -void cgsolver_sparse( double *Aval, int *I, int *J, double *b, double *x, int n){ +void cgsolver_sparse( double *Aval, int *row, int *col, double *b, double *x, int n ){ double * r; - double * rt; double * p; - double * pt; double rsold; double rsnew; double * Ap; double * tmp; double alpha; int incx = 1; int incy = 1; int k = 0; r = (double*) malloc(n* sizeof(double)); - rt = (double*) malloc(n* sizeof(double)); p = (double*) malloc(n* sizeof(double)); - pt = (double*) malloc(n* sizeof(double)); Ap = (double*) malloc(n* sizeof(double)); tmp = (double*) malloc(n* sizeof(double)); // r = b - A * x; - smvm(n, Aval, J, I, x, Ap); - cblas_dcopy(n,b,1,tmp,1); - cblas_daxpy(n, -1. , Ap,1, tmp, 1 ); - cblas_dcopy(n,tmp,1,r,1); + smvm(n, Aval, col, row, x, Ap); + cblas_dcopy(n, b, 1, r, 1); + cblas_daxpy(n, -1., Ap, 1, r, 1); -// p = r; +// p = r; cblas_dcopy (n, r, incx, p, incy); - cblas_dcopy(n,r,1,rt,1); - cblas_dcopy(n,p,1,pt,1); -// rsold = r' * r; - rsold = cblas_ddot (n,r,incx,rt,incy); - +// rsold = r' * r; + rsold = cblas_ddot (n, r, 1, r, 1); - -// for i = 1:length(b) +// for i = 1:length(b) while ( k < n ){ -// Ap = A * p; - smvm(n, Aval, J, I, p, Ap); -// alpha = rsold / (p' * Ap); - alpha = rsold / fmax( cblas_ddot(n, pt, incx, Ap, incy ), NEARZERO ); -// x = x + alpha * p; - memset(x, 0, n*sizeof(double)); +// Ap = A * p; + smvm(n, Aval, col, row, p, Ap); +// alpha = rsold / (p' * Ap); + alpha = rsold / fmax( cblas_ddot(n, p, 1, Ap, 1), rsold * NEARZERO); +// x = x + alpha * p; cblas_daxpy(n, alpha, p, 1, x, 1); -// r = r - alpha * Ap; +// r = r - alpha * Ap; cblas_daxpy(n, -alpha, Ap, 1, r, 1); -// rsnew = r' * r; - rsnew = cblas_ddot (n,r,incx,r,incy); -// if sqrt(rsnew) < 1e-10 -// break; +// rsnew = r' * r; + rsnew = cblas_ddot (n, r, 1, r ,1); +// if sqrt(rsnew) < 1e-10 +// break; if ( sqrt(rsnew) < TOLERANCE ) break; // Convergence test // p = r + (rsnew / rsold) * p; - memset(p, 0, n*sizeof(double)); - cblas_dcopy(n,r,1,tmp,1); - cblas_daxpy(n, (double)(rsnew/rsold), p, 1, tmp, 1); - cblas_dcopy(n,tmp,1,p,1); + cblas_dcopy(n, r, 1, tmp, 1); + cblas_daxpy(n, rsnew / rsold, p, 1, tmp, 1); + cblas_dcopy(n, tmp, 1, p, 1); - cblas_dcopy(n,p,1,pt,1); + //cblas_dcopy(n,p,1,pt,1); // rsold = rsnew; rsold = rsnew; + printf("\t[STEP %d] residual = %E\r", k, sqrt(rsold)); k++; } - printf("\t[STEP %d] residual = %E\n",k,sqrt(rsold)); + smvm(n, Aval, col, row, x, r); + cblas_daxpy(n, -1., b, 1, r, 1); + double res = cblas_ddot(n, r, 1, r, 1); + + printf("\t[STEP %d] residual = %E, ||Ax -b|| = %E\n",k,sqrt(rsold), sqrt(res)); free(r); - free(rt); free(p); - free(pt); free(Ap); free(tmp); } /* Sparse matrix vector multiplication */ void smvm(int m, const double* val, const int* col, const int* row, const double* x, double* y) { for (int i=0; i #include #include #include #include "util.h" #include "parameters.h" #include void cgsolver( double *A, double *b, double *x, int m, int n ); double * init_source_term(int n, double h); void smvm(int m, const double* val, const int* col, const int* row, const double* x, double* y); -void cgsolver_sparse( double *Aval, int *I, int *J, double *b, double *x, int n); +void cgsolver_sparse( double *Aval, int *row, int *col, double *b, double *x, int n); #endif /*CG_H_*/ diff --git a/cg_blas.c b/cg_blas.c index 2833445..e43536b 100644 --- a/cg_blas.c +++ b/cg_blas.c @@ -1,96 +1,98 @@ #include #include #include #include #include #include #include "mmio_wrapper.h" #include "util.h" #include "parameters.h" #include "cg.h" #include "second.h" /* Implementation of a simple CG solver using matrix in the mtx format (Matrix market) Any matrix in that format can be used to test the code */ int main ( int argc, char **argv ) { double * A; double * x; double * b; double t1,t2; // Arrays and parameters to read and store the sparse matrix double * val = NULL; - int * I = NULL; - int * J = NULL; + int * row = NULL; + int * col = NULL; int N; int nz; const char * element_type ="d"; - int symmetrize=0; + int symmetrize=1; int m,n; struct size_m sA; double h; if (argc < 2) { fprintf(stderr, "Usage: %s [martix-market-filename]\n", argv[0]); exit(1); } else { A = read_mat(argv[1]); sA = get_size(argv[1]); } - if (loadMMSparseMatrix(argv[1], *element_type, true, &N, &N, &nz, &val, &I, &J, symmetrize)){ + if (loadMMSparseMatrix(argv[1], *element_type, true, &N, &N, &nz, &val, &row, &col, symmetrize)){ fprintf (stderr, "!!!! loadMMSparseMatrix FAILED\n"); return EXIT_FAILURE; - }else{ + } else { printf("Matrix loaded from file %s\n",argv[1]); printf("N = %d \n",N); printf("nz = %d \n",nz); printf("val[0] = %f \n",val[0]); } m = sA.m; n = sA.n; h = 1./(double)n; b = init_source_term(n,h); x = (double*) malloc(n * sizeof(double)); + memset(x, 0., n*sizeof(double)); printf("Call cgsolver() on matrix size (%d x %d)\n",m,n); t1 = second(); cgsolver( A, b, x, m, n ); t2 = second(); printf("Time for CG (dense solver) = %f [s]\n",(t2-t1)); + memset(x, 0., n*sizeof(double)); printf("Call cgsolver_sparse() on matrix size (%d x %d)\n",m,n); t1 = second(); - cgsolver_sparse( val, I, J, b, x, N ); + cgsolver_sparse( val, row, col, b, x, N ); t2 = second(); printf("Time for CG (sparse solver) = %f [s]\n",(t2-t1)); free(A); free(b); free(x); free(val); - free(I); - free(J); + free(row); + free(col); // free(x0); return 0; } diff --git a/mmio.h b/mmio.h index 7cfd0a1..a082297 100644 --- a/mmio.h +++ b/mmio.h @@ -1,133 +1,135 @@ /* * Matrix Market I/O library for ANSI C * * See http://math.nist.gov/MatrixMarket for details. * * */ #ifndef MM_IO_H #define MM_IO_H #define MM_MAX_LINE_LENGTH 1025 #define MatrixMarketBanner "%%MatrixMarket" #define MM_MAX_TOKEN_LENGTH 64 typedef char MM_typecode[4]; char *mm_typecode_to_str(MM_typecode matcode); int mm_read_banner(FILE *f, MM_typecode *matcode); int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz); int mm_read_mtx_array_size(FILE *f, int *M, int *N); int mm_write_banner(FILE *f, MM_typecode matcode); int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz); int mm_write_mtx_array_size(FILE *f, int M, int N); /********************* MM_typecode query fucntions ***************************/ #define mm_is_matrix(typecode) ((typecode)[0]=='M') #define mm_is_sparse(typecode) ((typecode)[1]=='C') #define mm_is_coordinate(typecode)((typecode)[1]=='C') #define mm_is_dense(typecode) ((typecode)[1]=='A') #define mm_is_array(typecode) ((typecode)[1]=='A') #define mm_is_complex(typecode) ((typecode)[2]=='C') #define mm_is_real(typecode) ((typecode)[2]=='R') #define mm_is_pattern(typecode) ((typecode)[2]=='P') #define mm_is_integer(typecode) ((typecode)[2]=='I') #define mm_is_symmetric(typecode)((typecode)[3]=='S') #define mm_is_general(typecode) ((typecode)[3]=='G') #define mm_is_skew(typecode) ((typecode)[3]=='K') #define mm_is_hermitian(typecode)((typecode)[3]=='H') int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ /********************* MM_typecode modify fucntions ***************************/ #define mm_set_matrix(typecode) ((*typecode)[0]='M') #define mm_set_coordinate(typecode) ((*typecode)[1]='C') #define mm_set_array(typecode) ((*typecode)[1]='A') #define mm_set_dense(typecode) mm_set_array(typecode) #define mm_set_sparse(typecode) mm_set_coordinate(typecode) #define mm_set_complex(typecode)((*typecode)[2]='C') #define mm_set_real(typecode) ((*typecode)[2]='R') #define mm_set_pattern(typecode)((*typecode)[2]='P') #define mm_set_integer(typecode)((*typecode)[2]='I') #define mm_set_symmetric(typecode)((*typecode)[3]='S') #define mm_set_general(typecode)((*typecode)[3]='G') #define mm_set_skew(typecode) ((*typecode)[3]='K') #define mm_set_hermitian(typecode)((*typecode)[3]='H') #define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \ (*typecode)[2]=' ',(*typecode)[3]='G') #define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) /********************* Matrix Market error codes ***************************/ #define MM_COULD_NOT_READ_FILE 11 #define MM_PREMATURE_EOF 12 #define MM_NOT_MTX 13 #define MM_NO_HEADER 14 #define MM_UNSUPPORTED_TYPE 15 #define MM_LINE_TOO_LONG 16 #define MM_COULD_NOT_WRITE_FILE 17 /******************** Matrix Market internal definitions ******************** MM_matrix_typecode: 4-character sequence ojbect sparse/ data storage dense type scheme string position: [0] [1] [2] [3] Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) A(array) C(omplex) H(ermitian) P(attern) S(ymmetric) I(nteger) K(kew) ***********************************************************************/ #define MM_MTX_STR "matrix" #define MM_ARRAY_STR "array" #define MM_DENSE_STR "array" #define MM_COORDINATE_STR "coordinate" #define MM_SPARSE_STR "coordinate" #define MM_COMPLEX_STR "complex" #define MM_REAL_STR "real" #define MM_INT_STR "integer" #define MM_GENERAL_STR "general" #define MM_SYMM_STR "symmetric" #define MM_HERM_STR "hermitian" #define MM_SKEW_STR "skew-symmetric" #define MM_PATTERN_STR "pattern" /* high level routines */ int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[], double val[], MM_typecode matcode); int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[], double val[], MM_typecode matcode); +int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J, + double **val, MM_typecode *matcode); int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img, MM_typecode matcode); int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, double **val_, int **I_, int **J_); #endif diff --git a/parameters.h b/parameters.h index 6f36917..d980998 100644 --- a/parameters.h +++ b/parameters.h @@ -1,25 +1,25 @@ #ifndef PARAMETERS_H_ #define PARAMETERS_H_ #define id(width,row,col) (width*col+row) struct size_m { int m; int n; }; /* NEARZERO is an interpretation of "zero" */ -const double NEARZERO; +extern const double NEARZERO; /* TOLERANCE is the epsilon for the convergence error */ //extern const double TOLERANCE = 1.0e-10; extern const double TOLERANCE; #endif /*PARAMETERS_H_*/ diff --git a/util.c b/util.c index 9665b4c..91e9f5e 100644 --- a/util.c +++ b/util.c @@ -1,229 +1,230 @@ #include "util.h" // ==================================================================== // Implementation of useful functions // ==================================================================== /* print a matrix A of size m x n */ void print_mat( char title[], double *A, int m, int n ) { double x; printf("%s",title); for ( int i = 0; i < m; i++ ){ for ( int j = 0; j < n; j++ ){ x = A[id(m,i,j)]; //if ( abs( x ) < NEARZERO ) x = 0.0; printf("%f (%d)\t",x,id(m,i,j)); } printf("\n"); } } /* print first d elements from a matrix A of size m x n */ void print_first( char title[], double *A, int m, int n, int d ) { double x; printf("%s",title); for ( int i = 0; i < d; i++ ){ x = A[i]; //if ( abs( x ) < NEARZERO ) x = 0.0; printf("%f (%d)\t",x,i); } printf("\n"); } /* reads a matrix A of size m x n inspired by https://math.nist.gov/MatrixMarket/mmio/c/example_read.c */ double* read_mat(const char * restrict fn) { double * mat; int n,m; int ret_code; MM_typecode matcode; FILE *f; int nz; int i, *I, *J; double tmp; if ((f = fopen(fn, "r")) == NULL) { printf("Could not open matrix"); exit(1); } if (mm_read_banner(f, &matcode) != 0) { printf("Could not process Matrix Market banner.\n"); exit(1); } // Matrix is sparse if (mm_is_matrix(matcode) && mm_is_coordinate(matcode) ) { if ((ret_code = mm_read_mtx_crd_size(f, &m, &n, &nz)) !=0) { exit(1); }else{ if ( (m==1) || (n==1) ){ printf("size of vector = %d x %d\n",m,n); }else{ printf("size of matrix = %d x %d\n",m,n); } } } // Matrix is not sparse else if (mm_is_matrix(matcode) && mm_is_array(matcode) ) { // printf("case : %%MatrixMarket matrix array real symmetric \n"); if ((ret_code = mm_read_mtx_array_size(f, &m, &n)) !=0) { exit(1); }else{ if ( (m==1) || (n==1) ){ printf("size of vector = %d x %d\n",m,n); }else{ printf("size of matrix = %d x %d\n",m,n); } } nz = m*n; } // Matrix is something else else{ printf("Sorry, this application does not support "); printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); exit(1); } /* reserve memory for matrices */ I = (int *) malloc(nz * sizeof(int)); J = (int *) malloc(nz * sizeof(int)); mat = (double*) malloc(m*n * sizeof(double)); for (i=0; i