diff --git a/tools/colvars/abf_data.cpp b/tools/colvars/abf_data.cpp index bb08facd1..8b1ebd0a9 100644 --- a/tools/colvars/abf_data.cpp +++ b/tools/colvars/abf_data.cpp @@ -1,341 +1,341 @@ #include "abf_data.h" #include #include #include #include #include /// Construct gradient field object from an ABF-saved file ABFdata::ABFdata(const char *gradFileName) { std::ifstream gradFile; std::ifstream countFile; - int n; char hash; double xi; char *countFileName; countFileName = new char[strlen (gradFileName) + 2]; - strcpy (countFileName, gradFileName); + strcpy (countFileName, gradFileName); countFileName[strlen (gradFileName) - 4] = '\0'; strcat (countFileName, "count"); std::cout << "Opening file " << gradFileName << " for reading\n"; gradFile.open(gradFileName); if (!gradFile.good()) { std::cerr << "Cannot read from file " << gradFileName << ", aborting\n"; exit(1); } gradFile >> hash; if (hash != '#') { std::cerr << "Missing \'#\' sign in gradient file\n"; exit(1); } gradFile >> Nvars; std::cout << "Number of variables: " << Nvars << "\n"; sizes = new int[Nvars]; blocksizes = new int[Nvars]; PBC = new int[Nvars]; widths = new double[Nvars]; mins = new double[Nvars]; scalar_dim = 1; // total is (n1 * n2 * ... * n_Nvars ) for (int i = 0; i < Nvars; i++) { gradFile >> hash; if (hash != '#') { std::cerr << "Missing \'#\' sign in gradient file\n"; exit(1); } // format is: xiMin dxi Nbins PBCflag gradFile >> mins[i] >> widths[i] >> sizes[i] >> PBC[i]; std::cout << "min = " << mins[i] << " width = " << widths[i] << " n = " << sizes[i] << " PBC: " << (PBC[i]?"yes":"no") << "\n"; if (sizes[i] == 0) { std::cout << "ERROR: size should not be zero!\n"; exit(1); } scalar_dim *= sizes[i]; } // block sizes, smallest for the last dimension blocksizes[Nvars - 1] = 1; for (int i = Nvars - 2; i >= 0; i--) { blocksizes[i] = blocksizes[i + 1] * sizes[i + 1]; } vec_dim = scalar_dim * Nvars; //std::cout << "Gradient field has length " << vec_dim << "\n"; gradients = new double[vec_dim]; estimate = new double[vec_dim]; deviation = new double[vec_dim]; count = new unsigned int[scalar_dim]; int *pos = new int[Nvars]; for (int i = 0; i < Nvars; i++) pos[i] = 0; for (unsigned int i = 0; i < scalar_dim; i++) { // Here we do the Euclidian division iteratively for (int k = Nvars - 1; k > 0; k--) { if (pos[k] == sizes[k]) { pos[k] = 0; pos[k - 1]++; } } for (unsigned int j = 0; j < Nvars; j++) { // Read values of the collective variables only to check for consistency with grid gradFile >> xi; double rel_diff = (mins[j] + widths[j] * (pos[j] + 0.5) - xi) / widths[j]; if ( rel_diff * rel_diff > 1e-12 ) { std::cout << "\nERROR: wrong coordinates in gradient file\n"; std::cout << "Expected " << mins[j] + widths[j] * (pos[j] + 0.5) << ", got " << xi << std::endl; exit(1); } } for (unsigned int j = 0; j < Nvars; j++) { // Read and store gradients if ( ! (gradFile >> gradients[i * Nvars + j]) ) { std::cout << "\nERROR: could not read gradient data\n"; exit(1); } } pos[Nvars - 1]++; // move on to next position } // check for end of file if ( gradFile >> xi ) { std::cout << "\nERROR: extraneous data at end of gradient file\n"; exit(1); } gradFile.close(); std::cout << "Opening file " << countFileName << " for reading\n"; countFile.open(countFileName); if (!countFile.good()) { std::cerr << "Cannot read from file " << countFileName << ", aborting\n"; exit(1); } countFile >> hash; if (hash != '#') { std::cerr << "Missing \'#\' sign in count file\n"; exit(1); } countFile >> Nvars; for (int i = 0; i < Nvars; i++) { countFile >> hash; if (hash != '#') { std::cerr << "Missing \'#\' sign in gradient file\n"; exit(1); } countFile >> mins[i] >> widths[i] >> sizes[i] >> PBC[i]; } for (unsigned int i = 0; i < scalar_dim; i++) { for (unsigned int j = 0; j < Nvars; j++) { // Read and ignore values of the collective variables countFile >> xi; } // Read and store counts countFile >> count[i]; } // Could check for end-of-file string here countFile.close(); - delete [] countFileName; + delete[] countFileName; + delete[] pos; // for metadynamics bias = new double[scalar_dim]; histogram = new unsigned int[scalar_dim]; for (unsigned int i = 0; i < scalar_dim; i++) { histogram[i] = 0; bias[i] = 0.0; } } ABFdata::~ABFdata() { delete[] sizes; delete[] blocksizes; delete[] PBC; delete[] widths; delete[] mins; delete[] gradients; delete[] estimate; delete[] deviation; delete[] count; delete[] bias; delete[] histogram; } unsigned int ABFdata::offset(const int *pos) { unsigned int index = 0; for (int i = 0; i < Nvars; i++) { // Check for out-of bounds indices here if (pos[i] < 0 || pos[i] >= sizes[i]) { std::cerr << "Out-of-range index: " << pos[i] << " for rank " << i << "\n"; exit(1); } index += blocksizes[i] * pos[i]; } // we leave the multiplication below for the caller to do // we just give the offset for scalar fields // index *= Nvars; // Nb of gradient vectors -> nb of array elts return index; } void ABFdata::write_histogram(const char *fileName) { std::ofstream os; unsigned int index; int *pos, i; os.open(fileName); if (!os.good()) { std::cerr << "Cannot write to file " << fileName << ", aborting\n"; exit(1); } pos = new int[Nvars]; for (i = 0; i < Nvars; i++) pos[i] = 0; for (index = 0; index < scalar_dim; index++) { // Here we do the Euclidian division iteratively for (i = Nvars - 1; i > 0; i--) { if (pos[i] == sizes[i]) { pos[i] = 0; pos[i - 1]++; os << "\n"; } } // Now a stupid check: if (index != offset(pos)) { std::cerr << "Wrong position vector at index " << index << "\n"; exit(1); } for (i = 0; i < Nvars; i++) { os << mins[i] + widths[i] * (pos[i] + 0.5) << " "; } os << histogram[index] << "\n"; pos[Nvars - 1]++; // move on to next position } os.close(); delete[]pos; } void ABFdata::write_bias(const char *fileName) { // write the opposite of the bias, with global minimum set to 0 std::ofstream os; unsigned int index; int *pos, i; double minbias, maxbias; os.open(fileName); if (!os.good()) { std::cerr << "Cannot write to file " << fileName << ", aborting\n"; exit(1); } pos = new int[Nvars]; for (i = 0; i < Nvars; i++) pos[i] = 0; // Set the minimum value to 0 by subtracting each value from the max maxbias = bias[0]; for (index = 0; index < scalar_dim; index++) { if (bias[index] > maxbias) maxbias = bias[index]; } // Set the maximum value to that of the lowest nonzero bias minbias = bias[0]; for (index = 0; index < scalar_dim; index++) { if (minbias == 0.0 || (bias[index] > 0.0 && bias[index] < minbias)) minbias = bias[index]; } - + for (index = 0; index < scalar_dim; index++) { // Here we do the Euclidian division iteratively for (i = Nvars - 1; i > 0; i--) { if (pos[i] == sizes[i]) { pos[i] = 0; pos[i - 1]++; os << "\n"; } } // Now a stupid check: if (index != offset(pos)) { std::cerr << "Wrong position vector at index " << index << "\n"; exit(1); } for (i = 0; i < Nvars; i++) { os << mins[i] + widths[i] * (pos[i] + 0.5) << " "; } os << maxbias - (bias[index] > 0.0 ? bias[index] : minbias) << "\n"; pos[Nvars - 1]++; // move on to next position } os.close(); delete[]pos; } void ABFdata::write_field(double *field, const char *fileName) { std::ofstream os; unsigned int index; int *pos, i; double *f; os.open(fileName); if (!os.good()) { std::cerr << "Cannot write to file " << fileName << ", aborting\n"; exit(1); } pos = new int[Nvars]; for (i = 0; i < Nvars; i++) pos[i] = 0; // start at beginning of array f = field; for (index = 0; index < scalar_dim; index++) { // Here we do the Euclidian division iteratively for (i = Nvars - 1; i > 0; i--) { if (pos[i] == sizes[i]) { pos[i] = 0; pos[i - 1]++; os << "\n"; } } for (i = 0; i < Nvars; i++) { os << mins[i] + widths[i] * (pos[i] + 0.5) << " "; } for (i = 0; i < Nvars; i++) { os << f[i] << " ";; } os << "\n"; pos[Nvars - 1]++; // move on to next position... f += Nvars; // ...also in the array } os.close(); delete[]pos; } diff --git a/tools/colvars/abf_integrate.cpp b/tools/colvars/abf_integrate.cpp index 69ebb2280..79298c8c5 100644 --- a/tools/colvars/abf_integrate.cpp +++ b/tools/colvars/abf_integrate.cpp @@ -1,343 +1,340 @@ /**************************************************************** * abf_integrate * * Integrate n-dimensional PMF from discrete gradient grid * * Jerome Henin * ****************************************************************/ #include "abf_data.h" #include #include #include #include #include #include char *parse_cl(int argc, char *argv[], unsigned int *nsteps, double *temp, bool * meta, double *hill, double *hill_fact); double compute_deviation(ABFdata * data, bool meta, double kT); int main(int argc, char *argv[]) { char *data_file; char *out_file; unsigned int step, nsteps, total, out_freq; int *pos, *dpos, *newpos; - unsigned int *histogram; - const double *grad, *newgrad; unsigned int offset, newoffset; - int not_accepted; double dA; double temp; double mbeta; bool meta; double hill, hill_fact, hill_min; double rmsd, rmsd_old, rmsd_rel_change, convergence_limit; bool converged; unsigned int scale_hill_step; // Setting default values nsteps = 0; temp = 500; meta = true; hill = 0.01; hill_fact = 0.5; hill_min = 0.0005; convergence_limit = -0.001; if (!(data_file = parse_cl(argc, argv, &nsteps, &temp, &meta, &hill, &hill_fact))) { std::cerr << "\nabf_integrate: MC-based integration of multidimensional free energy gradient\n"; - std::cerr << "Version 20110511\n\n"; + std::cerr << "Version 20160420\n\n"; std::cerr << "Syntax: " << argv[0] << " [-n ] [-t ] [-m [0|1] (metadynamics)]" " [-h ] [-f ]\n\n"; exit(1); } if (meta) { std::cout << "\nUsing metadynamics-style sampling with hill height: " << hill << "\n"; if (hill_fact) { std::cout << "Varying hill height by factor " << hill_fact << "\n"; } } else { std::cout << "\nUsing unbiased MC sampling\n"; } - + if (nsteps) { std::cout << "Sampling " << nsteps << " steps at temperature " << temp << "\n\n"; out_freq = nsteps / 10; scale_hill_step = nsteps / 2; converged = true; } else { std::cout << "Sampling until convergence at temperature " << temp << "\n\n"; out_freq = 1000000; converged = false; } - // Inverse temperature in (kcal/mol)-1 + // Inverse temperature in (kcal/mol)-1 mbeta = -1 / (0.001987 * temp); ABFdata data(data_file); if (!nsteps) { scale_hill_step = 2000 * data.scalar_dim; nsteps = 2 * scale_hill_step; std::cout << "Setting minimum number of steps to " << nsteps << "\n"; } srand(time(NULL)); pos = new int[data.Nvars]; dpos = new int[data.Nvars]; newpos = new int[data.Nvars]; + // TODO: this will be an infinite loop if there is no sampling + // it would be more robust to build a list of non-empty bins, and + // pick from there (or just treat the special case of no sampling + // and output a null PMF) do { for (int i = 0; i < data.Nvars; i++) { pos[i] = rand() % data.sizes[i]; } offset = data.offset(pos); - } while ( !data.allowed (offset) ); + } while ( !data.allowed (offset) ); rmsd = compute_deviation(&data, meta, 0.001987 * temp); std::cout << "\nInitial gradient RMS is " << rmsd << "\n"; total = 0; for (step = 1; (step <= nsteps || !converged); step++) { if ( step % out_freq == 0) { rmsd_old = rmsd; rmsd = compute_deviation(&data, meta, 0.001987 * temp); rmsd_rel_change = (rmsd - rmsd_old) / (rmsd_old * double (out_freq)) * 1000000.0; std::cout << "Step " << step << " ; gradient RMSD is " << rmsd << " ; relative change per 1M steps " << rmsd_rel_change; if ( rmsd_rel_change > convergence_limit && step >= nsteps ) { converged = true; } if (meta && hill_fact && step > scale_hill_step && hill > hill_min ) { hill *= hill_fact; std::cout << " - changing hill height to " << hill << "\n"; } else { std::cout << "\n"; } } offset = data.offset(pos); data.histogram[offset]++; if (meta) { data.bias[offset] += hill; } - grad = data.gradients + offset * data.Nvars; + const double *grad = data.gradients + offset * data.Nvars; - not_accepted = 1; + int not_accepted = 1; while (not_accepted) { dA = 0.0; total++; for (int i = 0; i < data.Nvars; i++) { dpos[i] = rand() % 3 - 1; newpos[i] = pos[i] + dpos[i]; data.wrap(newpos[i], i); if (newpos[i] == pos[i]) dpos[i] = 0; if (dpos[i]) { dA += grad[i] * dpos[i] * data.widths[i]; // usefulness of the interpolation below depends on // where the grid points are for the histogram wrt to the gradients // If done, it has to be done in all directions // the line below is useless //dA += 0.5 * (newgrad[i] + grad[i]) * dpos[i] * data.widths[i]; } } newoffset = data.offset(newpos); if (meta) { dA += data.bias[newoffset] - data.bias[offset]; } if ( data.allowed (newoffset) && (((float) rand()) / RAND_MAX < exp(mbeta * dA)) ) { // Accept move for (int i = 0; i < data.Nvars; i++) { pos[i] = newpos[i]; not_accepted = 0; } } } } std::cout << "Run " << total << " total iterations; acceptance ratio is " << double (step) / double (total) << " ; final gradient RMSD is " << compute_deviation(&data, meta, 0.001987 * temp) << "\n"; out_file = new char[strlen(data_file) + 8]; if (meta) { sprintf(out_file, "%s.pmf", data_file); std::cout << "Writing PMF to file " << out_file << "\n"; data.write_bias(out_file); } // TODO write a PMF for unbiased MC, too... sprintf(out_file, "%s.histo", data_file); std::cout << "Writing sampling histogram to file " << out_file << "\n"; data.write_histogram(out_file); sprintf(out_file, "%s.est", data_file); std::cout << "Writing estimated FE gradient to file " << out_file << "\n"; data.write_field(data.estimate, out_file); sprintf(out_file, "%s.dev", data_file); std::cout << "Writing FE gradient deviation to file " << out_file << "\n\n"; data.write_field(data.deviation, out_file); delete [] pos; delete [] dpos; delete [] newpos; delete [] out_file; exit(0); } double compute_deviation(ABFdata * data, bool meta, double kT) { // Computing deviation between gradients differentiated from pmf // and input data // NOTE: this is mostly for output, hence NOT performance-critical double *dev = data->deviation; double *est = data->estimate; const double *grad = data->gradients; - int *pos, *dpos, *newpos; + int *pos, *newpos; double rmsd = 0.0; unsigned int offset, newoffset; double sum; int c; bool moved; unsigned int norm = 0; // number of data points summmed pos = new int[data->Nvars]; - dpos = new int[data->Nvars]; newpos = new int[data->Nvars]; for (int i = 0; i < data->Nvars; i++) pos[i] = 0; for (offset = 0; offset < data->scalar_dim; offset++) { for (int i = data->Nvars - 1; i > 0; i--) { if (pos[i] == data->sizes[i]) { pos[i] = 0; pos[i - 1]++; } } if (data->allowed (offset)) { for (int i = 0; i < data->Nvars; i++) newpos[i] = pos[i]; for (int i = 0; i < data->Nvars; i++) { est[i] = 0.0; sum = 0.0; // sum of finite differences on two sides (if not on edge of the grid) c = 0; // count of summed values newpos[i] = pos[i] - 1; moved = data->wrap(newpos[i], i); newoffset = data->offset(newpos); if ( moved && data->allowed (newoffset) ) { if (meta) { sum = (data->bias[newoffset] - data->bias[offset]) / data->widths[i]; c++; } else { if (data->histogram[offset] && data->histogram[newoffset]) { sum = kT * log(double (data->histogram[newoffset]) / double (data->histogram[offset])) / data->widths[i]; c++; } } } newpos[i] = pos[i] + 1; moved = data->wrap(newpos[i], i); newoffset = data->offset(newpos); if ( moved && data->allowed (newoffset) ) { if (meta) { sum += (data->bias[offset] - data->bias[newoffset]) / data->widths[i]; c++; } else { if (data->histogram[offset] && data->histogram[newoffset]) { sum += kT * log(double (data->histogram[offset]) / double (data->histogram[newoffset])) / data->widths[i]; c++; } } } newpos[i] = pos[i]; // Go back to initial position for next dimension est[i] = (c ? sum/double(c) : 0.0); dev[i] = grad[i] - est[i]; rmsd += dev[i] * dev[i]; norm++; } } pos[data->Nvars - 1]++; // move on to next point est += data->Nvars; dev += data->Nvars; grad += data->Nvars; } delete [] pos; delete [] newpos; - delete [] dpos; return sqrt(rmsd / norm); } char *parse_cl(int argc, char *argv[], unsigned int *nsteps, double *temp, bool * meta, double *hill, double *hill_fact) { - char *filename = NULL; - float f_temp, f_hill; int meta_int; // getting default value for the integer meta_int = (*meta ? 1 : 0); // "Syntax: " << argv[0] << " [-n ] [-t ] [-m [0|1] (metadynamics)] [-h ]\n"; if (argc < 2) { return NULL; } for (int i = 2; i + 1 < argc; i += 2) { if (argv[i][0] != '-') { return NULL; } switch (argv[i][1]) { case 'n': if (sscanf(argv[i + 1], "%u", nsteps) != 1) return NULL; break; case 't': if (sscanf(argv[i + 1], "%lf", temp) != 1) return NULL; break; case 'm': if (sscanf(argv[i + 1], "%u", &meta_int) != 1) return NULL; break; case 'h': if (sscanf(argv[i + 1], "%lf", hill) != 1) return NULL; break; case 'f': if (sscanf(argv[i + 1], "%lf", hill_fact) != 1) return NULL; break; default: return NULL; } } *meta = (meta_int != 0); return argv[1]; }