void dgemm( const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double* C, const int ldc)
{
int ib, jb, kb;
int i, j, k;
double* Ab = (double*) _mm_malloc(M_BLOCK_SIZE*K_BLOCK_SIZE*sizeof(double),32);