diff --git a/cg_blas.c b/cg_blas.c index 3bff599..2833445 100644 --- a/cg_blas.c +++ b/cg_blas.c @@ -1,96 +1,96 @@ #include #include #include #include #include #include +#include "mmio_wrapper.h" #include "util.h" #include "parameters.h" #include "cg.h" #include "second.h" -#include "mmio_wrapper.h" /* Implementation of a simple CG solver using matrix in the mtx format (Matrix market) Any matrix in that format can be used to test the code */ int main ( int argc, char **argv ) { double * A; double * x; double * b; double t1,t2; // Arrays and parameters to read and store the sparse matrix double * val = NULL; int * I = NULL; int * J = NULL; int N; int nz; const char * element_type ="d"; int symmetrize=0; int m,n; struct size_m sA; double h; if (argc < 2) { fprintf(stderr, "Usage: %s [martix-market-filename]\n", argv[0]); exit(1); } else { A = read_mat(argv[1]); sA = get_size(argv[1]); } if (loadMMSparseMatrix(argv[1], *element_type, true, &N, &N, &nz, &val, &I, &J, symmetrize)){ fprintf (stderr, "!!!! loadMMSparseMatrix FAILED\n"); return EXIT_FAILURE; }else{ printf("Matrix loaded from file %s\n",argv[1]); printf("N = %d \n",N); printf("nz = %d \n",nz); printf("val[0] = %f \n",val[0]); } m = sA.m; n = sA.n; h = 1./(double)n; b = init_source_term(n,h); x = (double*) malloc(n * sizeof(double)); printf("Call cgsolver() on matrix size (%d x %d)\n",m,n); t1 = second(); cgsolver( A, b, x, m, n ); t2 = second(); printf("Time for CG (dense solver) = %f [s]\n",(t2-t1)); printf("Call cgsolver_sparse() on matrix size (%d x %d)\n",m,n); t1 = second(); cgsolver_sparse( val, I, J, b, x, N ); t2 = second(); printf("Time for CG (sparse solver) = %f [s]\n",(t2-t1)); free(A); free(b); free(x); free(val); free(I); free(J); // free(x0); return 0; } diff --git a/mmio_wrapper.h b/mmio_wrapper.h new file mode 100644 index 0000000..6f89be8 --- /dev/null +++ b/mmio_wrapper.h @@ -0,0 +1,350 @@ +/* + * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +#include +#include +#include + +#include "mmio.h" + + +/* avoid Windows warnings (for example: strcpy, fscanf, etc.) */ +#if defined(_WIN32) +#define _CRT_SECURE_NO_WARNINGS +#endif + +static void compress_index( + const int *Ind, + int nnz, + int m, + int *Ptr, + int base) +{ + int i; + + /* initialize everything to zero */ + for(i=0; ii < t->i ){ + return -1 ; + } + else if ( s->i > t->i ){ + return 1 ; + } + else{ + return s->j - t->j ; + } +} + +int cmp_cooFormat_csc( struct cooFormat *s, struct cooFormat *t) +{ + if ( s->j < t->j ){ + return -1 ; + } + else if ( s->j > t->j ){ + return 1 ; + } + else{ + return s->i - t->i ; + } +} + +typedef int (*FUNPTR) (const void*, const void*) ; +typedef int (*FUNPTR2) ( struct cooFormat *s, struct cooFormat *t) ; + +static FUNPTR2 fptr_array[2] = { + cmp_cooFormat_csr, + cmp_cooFormat_csc, +}; + + +static int verify_pattern( + int m, + int nnz, + int *csrRowPtr, + int *csrColInd) +{ + int i, col, start, end, base_index; + int error_found = 0; + + if (nnz != (csrRowPtr[m] - csrRowPtr[0])){ + fprintf(stderr, "Error (nnz check failed): (csrRowPtr[%d]=%d - csrRowPtr[%d]=%d) != (nnz=%d)\n", 0, csrRowPtr[0], m, csrRowPtr[m], nnz); + error_found = 1; + } + + base_index = csrRowPtr[0]; + if ((0 != base_index) && (1 != base_index)){ + fprintf(stderr, "Error (base index check failed): base index = %d\n", base_index); + error_found = 1; + } + + for (i=0; (!error_found) && (i end){ + fprintf(stderr, "Error (corrupted row): csrRowPtr[%d] (=%d) > csrRowPtr[%d] (=%d)\n", i, start+base_index, i+1, end+base_index); + error_found = 1; + } + for (col=start; col= csrColInd[col+1])){ + fprintf(stderr, "Error (sorting of the column indecis check failed): (csrColInd[%d]=%d) >= (csrColInd[%d]=%d)\n", col, csrColInd[col], col+1, csrColInd[col+1]); + error_found = 1; + } + } + } + return error_found ; +} + + +int loadMMSparseMatrix( + char *filename, + char elem_type, + bool csrFormat, + int *m, + int *n, + int *nnz, + double **aVal, + int **aRowInd, + int **aColInd, + int extendSymMatrix) +{ + MM_typecode matcode; + double *tempVal; + int *tempRowInd,*tempColInd; + double *tval; + int *trow,*tcol; + int *csrRowPtr, *cscColPtr; + int i,j,error,base,count; + struct cooFormat *work; + + /* read the matrix */ + error = mm_read_mtx_crd(filename, m, n, nnz, &trow, &tcol, &tval, &matcode); + if (error) { + fprintf(stderr, "!!!! can not open file: '%s'\n", filename); + return 1; + } + + /* start error checking */ + if (mm_is_complex(matcode) && ((elem_type != 'z') && (elem_type != 'c'))) { + fprintf(stderr, "!!!! complex matrix requires type 'z' or 'c'\n"); + return 1; + } + + if (mm_is_dense(matcode) || mm_is_array(matcode) || mm_is_pattern(matcode) /*|| mm_is_integer(matcode)*/){ + fprintf(stderr, "!!!! dense, array, pattern and integer matrices are not supported\n"); + return 1; + } + + /* if necessary symmetrize the pattern (transform from triangular to full) */ + if ((extendSymMatrix) && (mm_is_symmetric(matcode) || mm_is_hermitian(matcode) || mm_is_skew(matcode))){ + //count number of non-diagonal elements + count=0; + for(i=0; i<(*nnz); i++){ + if (trow[i] != tcol[i]){ + count++; + } + } + //allocate space for the symmetrized matrix + tempRowInd = (int *)malloc((*nnz + count) * sizeof(int)); + tempColInd = (int *)malloc((*nnz + count) * sizeof(int)); + if (mm_is_real(matcode) || mm_is_integer(matcode)){ + tempVal = (double *)malloc((*nnz + count) * sizeof(double)); + } + else{ + tempVal = (double *)malloc(2 * (*nnz + count) * sizeof(double)); + } + //copy the elements regular and transposed locations + for(j=0, i=0; i<(*nnz); i++){ + tempRowInd[j]=trow[i]; + tempColInd[j]=tcol[i]; + if (mm_is_real(matcode) || mm_is_integer(matcode)){ + tempVal[j]=tval[i]; + } + else{ + tempVal[2*j] =tval[2*i]; + tempVal[2*j+1]=tval[2*i+1]; + } + j++; + if (trow[i] != tcol[i]){ + tempRowInd[j]=tcol[i]; + tempColInd[j]=trow[i]; + if (mm_is_real(matcode) || mm_is_integer(matcode)){ + if (mm_is_skew(matcode)){ + tempVal[j]=-tval[i]; + } + else{ + tempVal[j]= tval[i]; + } + } + else{ + if(mm_is_hermitian(matcode)){ + tempVal[2*j] = tval[2*i]; + tempVal[2*j+1]=-tval[2*i+1]; + } + else{ + tempVal[2*j] = tval[2*i]; + tempVal[2*j+1]= tval[2*i+1]; + } + } + j++; + } + } + (*nnz)+=count; + //free temporary storage + free(trow); + free(tcol); + free(tval); + } + else{ + tempRowInd=trow; + tempColInd=tcol; + tempVal =tval; + } + // life time of (trow, tcol, tval) is over. + // please use COO format (tempRowInd, tempColInd, tempVal) + +// use qsort to sort COO format + work = (struct cooFormat *)malloc(sizeof(struct cooFormat)*(*nnz)); + if (NULL == work){ + fprintf(stderr, "!!!! allocation error, malloc failed\n"); + return 1; + } + for(i=0; i<(*nnz); i++){ + work[i].i = tempRowInd[i]; + work[i].j = tempColInd[i]; + work[i].p = i; // permutation is identity + } + + if (csrFormat){ + /* create row-major ordering of indices (sorted by row and within each row by column) */ + qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[0] ); + }else{ + /* create column-major ordering of indices (sorted by column and within each column by row) */ + qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[1] ); + + } + + // (tempRowInd, tempColInd) is sorted either by row-major or by col-major + for(i=0; i<(*nnz); i++){ + tempRowInd[i] = work[i].i; + tempColInd[i] = work[i].j; + } + + // setup base + // check if there is any row/col 0, if so base-0 + // check if there is any row/col equal to matrix dimension m/n, if so base-1 + int base0 = 0; + int base1 = 0; + for(i=0; i<(*nnz); i++){ + const int row = tempRowInd[i]; + const int col = tempColInd[i]; + if ( (0 == row) || (0 == col) ){ + base0 = 1; + } + if ( (*m == row) || (*n == col) ){ + base1 = 1; + } + } + if ( base0 && base1 ){ + printf("Error: input matrix is base-0 and base-1 \n"); + return 1; + } + + base = 0; + if (base1){ + base = 1; + } + + /* compress the appropriate indices */ + if (csrFormat){ + /* CSR format (assuming row-major format) */ + csrRowPtr = (int *)malloc(((*m)+1) * sizeof(csrRowPtr[0])); + if (!csrRowPtr) return 1; + compress_index(tempRowInd, *nnz, *m, csrRowPtr, base); + + *aRowInd = csrRowPtr; + *aColInd = (int *)malloc((*nnz) * sizeof(int)); + } + else { + /* CSC format (assuming column-major format) */ + cscColPtr = (int *)malloc(((*n)+1) * sizeof(cscColPtr[0])); + if (!cscColPtr) return 1; + compress_index(tempColInd, *nnz, *n, cscColPtr, base); + + *aColInd = cscColPtr; + *aRowInd = (int *)malloc((*nnz) * sizeof(int)); + } + + /* transfrom the matrix values of type double into one of the cusparse library types */ + *aVal = (double *)malloc((*nnz) * sizeof(double)); + + for (i=0; i<(*nnz); i++) { + if (csrFormat){ + (*aColInd)[i] = tempColInd[i]; + } + else{ + (*aRowInd)[i] = tempRowInd[i]; + } + if (mm_is_real(matcode) || mm_is_integer(matcode)){ + (*aVal)[i] = tempVal[ work[i].p ] ; + } + else{ + (*aVal)[i] = tempVal[2*work[i].p]; + } + } + + /* check for corruption */ + int error_found; + if (csrFormat){ + error_found = verify_pattern(*m, *nnz, *aRowInd, *aColInd); + }else{ + error_found = verify_pattern(*n, *nnz, *aColInd, *aRowInd); + } + if (error_found){ + fprintf(stderr, "!!!! verify_pattern failed\n"); + return 1; + } + + /* cleanup and exit */ + free(work); + free(tempVal); + free(tempColInd); + free(tempRowInd); + + return 0; +} +