diff --git a/src/database/reader_common.cpp b/src/database/reader_common.cpp index a8f7381..8983d69 100644 --- a/src/database/reader_common.cpp +++ b/src/database/reader_common.cpp @@ -1,342 +1,342 @@ /*------------------------------------------------------------------------------- Copyright (c) 2015 F. Georget <fabieng@princeton.edu>, Princeton University All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -----------------------------------------------------------------------------*/ #include "reader_common.hpp" #include "section_name.hpp" #include "errors.hpp" #include <iostream> #include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/trim_all.hpp> #include <boost/algorithm/string/predicate.hpp> #include <locale> //#include <cctype> namespace specmicp { namespace database { void parse_equation(const std::string& equation, std::map<std::string, scalar_t> &compo) { std::vector<std::string> list_compo; boost::split(list_compo, equation, [](char input){return input == ',';}, boost::token_compress_on); for (auto it=list_compo.begin(); it!=list_compo.end(); ++it) { std::string& toparse = *it; double coeff = 0; boost::trim_all(toparse); unsigned int pos_end = 0; while (pos_end < toparse.size()) { if (std::isalpha(toparse[pos_end])) // or std::isblank(toparse[pos_end])) { break; } ++pos_end; } std::string label(toparse.substr(pos_end, toparse.size()-pos_end)); boost::trim_all(label); if (pos_end == 0) coeff =1; else if (pos_end == 1 and toparse[0] == '-') coeff=-1; else { std::string tofloat = toparse.substr(0, pos_end); while (pos_end > 1) { if ((tofloat[0] == '-' or tofloat[0] == '+') and std::isspace(tofloat[1])) { tofloat = tofloat[0] + tofloat.substr(2,pos_end-2); --pos_end; continue; } else { break; } } if ( tofloat[pos_end-1] == '-' ) {coeff = -1;} else if (tofloat[pos_end-1] == '+') {coeff = +1;} else {coeff = std::stof(tofloat);} } compo[label] = coeff; } } scalar_t charge_from_label(const std::string& label) { int sign=1; auto start= label.end(); auto stop = label.end(); for (auto it=label.begin(); it != label.end(); ++it) { if (*it == INDB_OPEN_DELIMITER_CHARGE) { start = it; } } if (start == label.end()) {return 0;} // no charge specified for (auto it=start+1; it != label.end(); ++it) { if (*it == INDB_CLOSE_DELIMITER_CHARGE) { stop = it; } } if (stop == label.end()) {throw db_invalid_syntax("Charge : missing closing delimiter for species : "+label+".");} start = start+1; if (stop == start) {return 0;} // nothing inside bracket // get the signs if (*(stop-1) == '-') { sign = -1; stop =stop-1; } else if (*(stop-1) == '+') { sign = +1; stop = stop-1; } if (stop == start) { return sign; // just the sign, |z|=1 } scalar_t charge; try { charge = sign*std::stof(std::string(start,stop)); } catch (std::invalid_argument&) { throw db_invalid_syntax("Charge : bad formatting for species :"+ label+"."); } return charge; } // find the next element std::string::size_type find_element(const std::string& label, std::string::size_type pos, element_map& element_coeff); // insert label in elem_map if it doesn't exist else add coeff to the value void insert_or_add(element_map& elem_map, const std::string& label, scalar_t coeff); // find the position of the matching parenthesis std::string::size_type find_matching_parenthesis(const std::string& label, std::string::size_type init_pos); // find a number, return 1 if it's not there std::string::size_type find_number(const std::string& label, std::string::size_type init_pos, scalar_t& number); // return true of the bit in paremnthesis is a phase qualifier (aq, g, s, am, mic) bool is_phase_qualifier(const std::string& bit_in_parenthesis); // return true if it is an expected special character (not a number or a parenthesis) bool is_special(std::string::size_type charac); void element_composition_from_label(std::string label, element_map& compo) { boost::trim_all(label); compo.clear(); std::string::size_type pos = 0; while (pos < label.size()) { if (label[pos] == '(') // ex Al(OH)4 { ++pos; // go into the parenthesis // find composition in parenthesis element_map subcompo; std::string::size_type final_pos = find_matching_parenthesis(label, pos); const std::string sublabel = label.substr(pos, final_pos-pos); if (is_phase_qualifier(sublabel)) { // skip that pos = final_pos; break; } element_composition_from_label(sublabel, subcompo); // find coefficient of parenthesis scalar_t coeff; final_pos = find_number(label, final_pos+1, coeff); // add the compo of parenthesis to the total compo add_to_element_map(compo, subcompo, coeff); pos = final_pos; } else if (label[pos] == ':') // ex CaSO3:0.5H2O { ++pos; // skip the semi-colon // find coefficient scalar_t coeff; std::string::size_type final_pos = find_number(label, pos, coeff); // find element element_map subcompo; element_composition_from_label(label.substr(final_pos, label.size()-final_pos), subcompo); // add compo to total compo add_to_element_map(compo, subcompo, coeff); pos = label.size(); } else if (label[pos] == '[') // charge AlO(OH)3[-] { // it's finished break; } else { pos = find_element(label, pos, compo); } } } bool is_special(std::string::size_type charac) { return (charac == '(' or charac == ':' or charac == ')' or charac == '[' or charac == ']'); } std::string::size_type find_matching_parenthesis(const std::string& label, std::string::size_type init_pos) { for (std::string::size_type ind=init_pos; ind<label.size(); ++ind) { if (label[ind] == '(') { - throw std::invalid_argument("Too many parenthesis in label : '"+label+"' ( pos : "+std::to_string(ind)+" )."); + throw std::invalid_argument("Too many parenthesis in formula : '"+label+"' ( pos : "+std::to_string(ind)+" )."); } else if (label[ind] == ')') { return ind; } } - throw std::invalid_argument("Unmatched parenthesis in label : '"+label+"' (pos : "+std::to_string(init_pos)+" )."); + throw std::invalid_argument("Unmatched parenthesis in formula : '"+label+"' (pos : "+std::to_string(init_pos)+" )."); } std::string::size_type find_number(const std::string& label, std::string::size_type init_pos, scalar_t& number) { if (not std::isdigit(label[init_pos])) { number = 1; return init_pos; } std::string::size_type pos = init_pos + 1; while (pos < label.size()) { const std::string::value_type chr = label[pos]; if (not (chr == '.' or std::isdigit(chr))) { break; } ++pos; } const std::string coeff_str = label.substr(init_pos, pos); number = std::stod(coeff_str); return pos; } void insert_or_add(element_map& elem_map, const std::string& label, scalar_t coeff) { auto it = elem_map.find(label); if (it != elem_map.end()) { it->second += coeff; } else { elem_map.insert(element_map::value_type(label, coeff)); } } std::string::size_type find_element(const std::string& label, std::string::size_type pos, element_map& compo) { if (not std::isupper(label[pos])) - throw std::invalid_argument("Invalid label : "+label+" (Error detected at position "+std::to_string(pos)+" )."); + throw std::invalid_argument("Invalid formula : "+label+" (Error detected at position "+std::to_string(pos)+" )."); std::string::size_type current_pos = pos +1; std::string::size_type start_number_pos = std::string::npos; while (true) { if (current_pos == label.size() or is_special(label[current_pos]) or std::isupper(label[current_pos]) ) { if (start_number_pos == std::string::npos) { insert_or_add(compo, label.substr(pos, current_pos-pos), 1.0); } else { const std::string coeff_str = label.substr(start_number_pos, current_pos-start_number_pos); insert_or_add(compo, label.substr(pos, start_number_pos-pos), std::stod(coeff_str)); } break; } else if (std::isdigit(label[current_pos]) or label[current_pos] == '.') { if (start_number_pos == std::string::npos) start_number_pos = current_pos; } ++current_pos; } return current_pos; } bool is_phase_qualifier(const std::string& bit_in_parenthesis) { if (bit_in_parenthesis == "aq" or bit_in_parenthesis == "g" or bit_in_parenthesis == "s" or bit_in_parenthesis == "am" or bit_in_parenthesis == "mic" ) { return true; } else { return false; } } void add_to_element_map(element_map& to_update, const element_map& to_add, const scalar_t coeff) { for (auto& it:to_add) { insert_or_add(to_update, it.first, coeff*it.second); } } } //end namespace database } //end namespace specmicp diff --git a/src/database/section_name.hpp b/src/database/section_name.hpp index 39e9a2d..8e38ac5 100644 --- a/src/database/section_name.hpp +++ b/src/database/section_name.hpp @@ -1,36 +1,38 @@ #define INDB_SECTION_ELEMENTS "Elements" #define INDB_SECTION_BASIS "Basis" #define INDB_SECTION_AQUEOUS "Aqueous" #define INDB_SECTION_MINERALS "Minerals" #define INDB_SECTION_GAS "Gas" #define INDB_SECTION_SORBED "Sorbed" #define INDB_SECTION_COMPOUNDS "Compounds" #define INDB_ATTRIBUTE_LABEL "label" #define INDB_ATTRIBUTE_ACTIVITY "activity" #define INDB_ATTRIBUTE_ACTIVITY_A "a" #define INDB_ATTRIBUTE_ACTIVITY_B "b" #define INDB_ATTRIBUTE_MOLARMASS "molar_mass" #define INDB_ATTRIBUTE_MOLARVOLUME "molar_volume" #define INDB_ATTRIBUTE_COMPOSITION "composition" #define INDB_ATTRIBUTE_LOGK "log_k" #define INDB_ATTRIBUTE_NBSITEOCCUPIED "nb_sites_occupied" #define INDB_ATTRIBUTE_FLAG_KINETIC "flag_kinetic" +#define INDB_ATTRIBUTE_FORMULA "formula" + #define INDB_OPEN_DELIMITER_CHARGE '[' #define INDB_CLOSE_DELIMITER_CHARGE ']' #define INDB_SECTION_METADATA "Metadata" #define INDB_ATTRIBUTE_NAME "name" #define INDB_ATTRIBUTE_VERSION "version" #define INDB_ATTRIBUTE_PATH "path" #define INDB_ATTRIBUTE_CHECKCOMPO "check_composition" #define INDB_ATTRIBUTE_ELEMENT "element" #define INDB_ATTRIBUTE_COMPONENT "component" #define INDB_MAIN "__main__" #define INDB_ELECTRON "E[-]" diff --git a/src/database/yaml_reader.cpp b/src/database/yaml_reader.cpp index 563c9b1..a482622 100644 --- a/src/database/yaml_reader.cpp +++ b/src/database/yaml_reader.cpp @@ -1,666 +1,754 @@ /*------------------------------------------------------------------------------- Copyright (c) 2015 F. Georget <fabieng@princeton.edu>, Princeton University All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -----------------------------------------------------------------------------*/ #include "yaml_reader.hpp" #include "reader_common.hpp" #include "section_name.hpp" #include <yaml-cpp/yaml.h> #include "../utils/io/yaml.hpp" #include <fstream> #include "../utils/log.hpp" #include "../utils/compat.hpp" #define PRECISION_TEST_CHARGE 1e-6 +#include <iostream> namespace specmicp { namespace database { struct DataReaderYaml::DataReaderYamlElementData { bool check_compo {true}; std::vector<element_map> components; + bool is_init {false}; }; DataReaderYaml::DataReaderYaml(): m_elem_data(make_unique<DataReaderYamlElementData>()) {} DataReaderYaml::~DataReaderYaml() = default; //! \brief Constructor DataReaderYaml::DataReaderYaml(RawDatabasePtr data, bool check_compo): DatabaseModule(data), m_elem_data(make_unique<DataReaderYamlElementData>()) { m_elem_data->check_compo = check_compo; } //! \brief Constructor //! //! @param filepath string containing the path to the database DataReaderYaml::DataReaderYaml(const std::string& filepath, bool check_compo): DatabaseModule(), m_elem_data(make_unique<DataReaderYamlElementData>()) { m_elem_data->check_compo = check_compo; parse(filepath); } //! \brief Constructor //! //! @param input input stream that contains the database DataReaderYaml::DataReaderYaml(std::istream& input, bool check_compo): DatabaseModule(), m_elem_data(make_unique<DataReaderYamlElementData>()) { m_elem_data->check_compo = check_compo; parse(input); } // safe access to values std::string obtain_label(const YAML::Node& species, const std::string& section); void obtain_composition(const YAML::Node& species, std::map<std::string, scalar_t>& compo, const std::string& label); scalar_t obtain_logk(const YAML::Node& species, const std::string& label); bool is_kinetic(const YAML::Node& species,const std::string& label); void DataReaderYaml::parse(const std::string& filepath) { std::ifstream datafile(filepath, std::ifstream::binary); if (not datafile.is_open()) { ERROR << "Database not found : " << filepath; std::string message("Database not found : " + filepath); throw std::invalid_argument(message); } data->metadata.path = filepath; parse(datafile); datafile.close(); } void DataReaderYaml::parse(std::istream& input ) { const YAML::Node root = io::parse_yaml(input); const char* indb_main = INDB_MAIN; io::check_mandatory_yaml_node(root, INDB_SECTION_METADATA, indb_main); parse_metadata(root[INDB_SECTION_METADATA]); // Basis // ------ io::check_mandatory_yaml_node(root, INDB_SECTION_BASIS, indb_main); parse_basis(root[INDB_SECTION_BASIS]); // Elements // -------- io::check_mandatory_yaml_node(root, INDB_SECTION_ELEMENTS, indb_main); ElementList elements; parse_elements(root[INDB_SECTION_ELEMENTS], elements); data->elements = std::move(elements); // Aqueous species // --------------- io::check_mandatory_yaml_node(root, INDB_SECTION_AQUEOUS, indb_main); AqueousList alist; parse_aqueous(root[INDB_SECTION_AQUEOUS], alist); data->aqueous = std::move(alist); // Minerals // -------- io::check_mandatory_yaml_node(root, INDB_SECTION_MINERALS, indb_main); MineralList minerals, minerals_kinetic; parse_minerals(root[INDB_SECTION_MINERALS], minerals, minerals_kinetic); data->minerals = std::move(minerals); data->minerals_kinetic = std::move(minerals_kinetic); // Gas // ---- GasList glist(0, data->nb_component()); if (root[INDB_SECTION_GAS]) parse_gas(root[INDB_SECTION_GAS], glist); else glist.set_valid(); data->gas = std::move(glist); // Sorbed species // -------------- SorbedList slist(0, data->nb_component()); if (root[INDB_SECTION_SORBED]) parse_sorbed(root[INDB_SECTION_SORBED], slist); else slist.set_valid(); data->sorbed = std::move(slist); // Compounds // --------- CompoundList clist(0, data->nb_component()); if (root[INDB_SECTION_COMPOUNDS]) parse_compounds(root[INDB_SECTION_COMPOUNDS], clist); else clist.set_valid(); data->compounds = std::move(clist); } void DataReaderYaml::parse_metadata(const YAML::Node& root) { data->metadata.name = io::get_yaml_mandatory<std::string>(root, INDB_ATTRIBUTE_NAME, INDB_SECTION_METADATA); data->metadata.version = io::get_yaml_mandatory<std::string>(root, INDB_ATTRIBUTE_VERSION, INDB_SECTION_METADATA); m_elem_data->check_compo = io::get_yaml_optional<bool>(root, INDB_ATTRIBUTE_CHECKCOMPO, INDB_SECTION_METADATA, m_elem_data->check_compo); } +void DataReaderYaml::init_elemental_composition() +{ + if (m_elem_data->is_init) return; + + m_elem_data->components.reserve(data->nb_component()); + // Parse label to find elemental composition + for (auto id: data->range_component()) + { + element_map elem_compo; + element_composition_from_label(data->get_label_component(id), elem_compo); + m_elem_data->components.emplace_back(elem_compo); + } + m_elem_data->is_init = true; +} + void DataReaderYaml::parse_basis(const YAML::Node& basis) { DEBUG << "Size basis : " << basis.size(); data->components = ComponentList(basis.size()); - m_elem_data->components = std::vector<element_map>(basis.size()); if (data->nb_component() <= 3) throw std::invalid_argument("The basis is too small."); index_t starting_point = 2; for (int id: data->components.range()) { index_t id_in_db = starting_point; if (id_in_db >= data->nb_component()) { throw std::invalid_argument("The basis should contain H2O and E[-]."); } const YAML::Node& species = basis[id]; std::string label = obtain_label(species, INDB_SECTION_BASIS); if (label == water_label) { id_in_db = 0; //throw invalid_database("The first component should be "+water_label+"."); } else if (label == electron_label) { id_in_db = 1; //throw invalid_database("The second component should be "+electron_label+"."); } else { ++starting_point; } auto is_already_in_the_database = data->components.get_id(label); if (is_already_in_the_database != no_species) throw invalid_database("Component : " + label + "is already in the database."); - // Parse label to find elemental composition - if (m_elem_data->check_compo) - { - element_map elem_compo; - element_composition_from_label(label, elem_compo); - m_elem_data->components[id_in_db] = elem_compo; - } + SPAM << "Parsing component : " << label; ComponentValues values; values.label = label; // activity coeeficients values.ionic_values.charge = charge_from_label(label); if (species[INDB_ATTRIBUTE_ACTIVITY]) { values.ionic_values.a_debye = io::get_yaml_mandatory<scalar_t>( species[INDB_ATTRIBUTE_ACTIVITY], INDB_ATTRIBUTE_ACTIVITY_A, label); values.ionic_values.b_debye = io::get_yaml_mandatory<scalar_t>( species[INDB_ATTRIBUTE_ACTIVITY], INDB_ATTRIBUTE_ACTIVITY_B, label); } // molar mass values.molar_mass = io::get_yaml_mandatory<scalar_t>( species, INDB_ATTRIBUTE_MOLARMASS, label); data->components.set_values(id_in_db, std::move(values)); } + if (m_elem_data->check_compo) init_elemental_composition(); // after this the basis is ready to use data->components.set_valid(); } void DataReaderYaml::parse_aqueous(const YAML::Node& aqueous, AqueousList& alist) { DEBUG << "Parse aqueous species"; alist = AqueousList(aqueous.size(), data->nb_component()); //const auto size = data->nb_aqueous(); for (auto id: alist.range()) { const YAML::Node& species = aqueous[static_cast<int>(id)]; AqueousValues values; values.label = obtain_label(species, INDB_SECTION_AQUEOUS); values.ionic_values.charge = charge_from_label(values.label); if (species[INDB_ATTRIBUTE_ACTIVITY]) { values.ionic_values.a_debye = io::get_yaml_mandatory<scalar_t>( species[INDB_ATTRIBUTE_ACTIVITY], INDB_ATTRIBUTE_ACTIVITY_A, values.label); values.ionic_values.b_debye = io::get_yaml_mandatory<scalar_t>( species[INDB_ATTRIBUTE_ACTIVITY], INDB_ATTRIBUTE_ACTIVITY_B, values.label); } values.logk = obtain_logk(species, values.label); alist.set_values(id, values); // equation std::map<std::string, scalar_t> compo; obtain_composition(species, compo, values.label); scalar_t test_charge {0.0}; std::map<index_t, scalar_t> to_canonicalize; for (const auto& it: compo) { // component ? const auto search = data->components.get_id(it.first); if(search != no_species) { alist.set_nu_ji(id, search, it.second); test_charge += it.second*data->charge_component(search); } // aqueous species ? else { // in the document we parse ? const auto id_aq = alist.get_id(it.first); if (id_aq != no_species) { to_canonicalize.insert({id_aq, it.second}); test_charge += it.second*alist.charge(id_aq); } // cannot be found... else { throw db_invalid_syntax("Unknown species : '" + it.first + "' while parsing equations for " + alist.get_label(id) +"."); } } } if (std::abs(test_charge - alist.charge(id)) > PRECISION_TEST_CHARGE) { throw db_invalid_data("Total charge is not zero for aqueous species : '" + alist.get_label(id) + "' - Charge-decomposition - charge species : "+ std::to_string(test_charge - alist.charge(id) )+"."); } for (const auto& tocanon: to_canonicalize) { alist.canonicalize(id, tocanon.first, tocanon.second); } // check composition if (m_elem_data->check_compo) { - element_map compo_from_label; - element_composition_from_label(alist.get_label(id), compo_from_label); - - element_map compo_from_compo; - for (auto idc: data->range_component()) - { - if (alist.nu_ji(id, idc) == 0.0) continue; - add_to_element_map(compo_from_compo, - m_elem_data->components[idc], - alist.nu_ji(id, idc) - ); - } - - compo_from_compo.erase(electron_label); - - for (auto it=compo_from_compo.begin(); it!=compo_from_compo.end(); ) - { - if (it->first == "E" or it->second == 0.0) - it = compo_from_compo.erase(it); - else - ++it; - } - if (compo_from_label != compo_from_compo) - { - throw db_invalid_data("Inconsistent composition for aqueous species : '" + alist.get_label(id) + "'."); - } + if (species[INDB_ATTRIBUTE_FORMULA]) + check_composition(species[INDB_ATTRIBUTE_FORMULA].as<std::string>(), alist, id); + else + check_composition(alist.get_label(id), alist, id); } - } // valid set of aqueous species alist.set_valid(); } void DataReaderYaml::parse_minerals( const YAML::Node& minerals, MineralList& minerals_list, MineralList& minerals_kinetic_list ) { // this one is a little bit more complicated since we need to detect // if it is a solid phase governed by equilibrium or kinetic DEBUG << "Parse minerals"; index_t nb_mineral = minerals.size(); // find kinetic minerals int nb_kin = 0; for (int id=0; id<nb_mineral; ++id) { if (is_kinetic(minerals[id], INDB_SECTION_MINERALS)) ++nb_kin; } minerals_list = MineralList(nb_mineral - nb_kin, data->nb_component()); minerals_kinetic_list = MineralList(nb_kin, data->nb_component()); // id for each class of minerals index_t id_in_eq=0; index_t id_in_kin=0; for (index_t id=0; id<nb_mineral; ++id) { const YAML::Node& species = minerals[static_cast<int>(id)]; //check if material is at equilibrium or governed by kinetics MineralValue value; value.label = std::move(obtain_label(species, INDB_SECTION_MINERALS)); value.logk = obtain_logk(species, value.label); bool is_kin = is_kinetic(species, value.label); // ###FIXME value.molar_volume = io::get_yaml_optional<scalar_t>(species, INDB_ATTRIBUTE_MOLARVOLUME, value.label, -1); auto& mlist = is_kin?minerals_kinetic_list:minerals_list; auto& true_id = is_kin?id_in_kin:id_in_eq; mlist.set_values(true_id, value); // equation // -------- std::map<std::string, scalar_t> compo; obtain_composition(species, compo, value.label); double test_charge = 0; std::map<index_t, scalar_t> to_canonicalize; for (const auto& it: compo) { auto idc = data->components.get_id(it.first); if(idc != no_species) { // this is a component mlist.set_nu_ji(true_id, idc, it.second); test_charge += it.second * data->charge_component(idc); } else { // this is an aqueous species auto idaq = data->aqueous.get_id(it.first); if (idaq != no_species) { to_canonicalize.insert({idaq, it.second}); test_charge += it.second * data->charge_aqueous(idaq); } else // or something we don't know { throw db_invalid_syntax("Unknown species : '" + it.first + "' while parsing equations for " + mlist.get_label(true_id) +"."); } } } if (std::abs(test_charge) > PRECISION_TEST_CHARGE) { throw db_invalid_data("Total charge is not zero for mineral : '"+ mlist.get_label(true_id) + "' - total charge : "+std::to_string(test_charge)+"."); } // canonicalise for (const auto& tocanon: to_canonicalize) { mlist.canonicalize(true_id, data->aqueous, tocanon.first, tocanon.second); } + // check composition + if (m_elem_data->check_compo) + { + if (species[INDB_ATTRIBUTE_FORMULA]) + check_composition(species[INDB_ATTRIBUTE_FORMULA].as<std::string>(), mlist, true_id); + else + check_composition(mlist.get_label(true_id), mlist, true_id); + } + // update index ++true_id; } specmicp_assert(id_in_kin == minerals_kinetic_list.size()); specmicp_assert(id_in_eq == minerals_list.size()); // ok after that point minerals_list.set_valid(); minerals_kinetic_list.set_valid(); } void DataReaderYaml::parse_gas(const YAML::Node& gas, GasList& glist) { DEBUG << "Parse gas"; glist = GasList(gas.size(), data->nb_component()); for (auto id: glist.range()) { const YAML::Node& species = gas[static_cast<int>(id)]; GasValues values; values.label = obtain_label(species, INDB_SECTION_GAS); auto is_already_in_the_database = data->gas.get_id(values.label); if (is_already_in_the_database != no_species) throw invalid_database("Component : " + values.label + "is already in the database."); values.logk = obtain_logk(species, values.label); glist.set_values(id, values); // equation std::map<std::string, scalar_t> compo; obtain_composition(species, compo, values.label); scalar_t test_charge = 0.0; std::map<index_t, scalar_t> to_canonicalize; for (auto it: compo) { const auto idc = data->components.get_id(it.first); // component if(idc != no_species) { glist.set_nu_ji(id, idc, it.second); test_charge += it.second*data->charge_component(idc); } // aqueous species else { const auto idaq = data->aqueous.get_id(it.first); if (idaq != no_species) { to_canonicalize.insert({idaq, it.second}); test_charge += it.second*data->charge_aqueous(idaq); } else { throw db_invalid_syntax("Unknown species : '" + it.first + "' while parsing equations for " + data->gas.get_label(id) +"."); } } } if (std::abs(test_charge) > PRECISION_TEST_CHARGE) { throw db_invalid_data("Total charge is not zero for gas '"+data->gas.get_label(id)+ "' : " + std::to_string(test_charge)+"."); } // canonicalise for (const auto& tocanon: to_canonicalize) { glist.canonicalize(id, data->aqueous, tocanon.first, tocanon.second); } + + // check composition + if (m_elem_data->check_compo) + { + if (species[INDB_ATTRIBUTE_FORMULA]) + check_composition(species[INDB_ATTRIBUTE_FORMULA].as<std::string>(), glist, id); + else + check_composition(glist.get_label(id), glist, id); + } } glist.set_valid(); } void DataReaderYaml::parse_sorbed(const YAML::Node& sorbed, SorbedList& slist) { DEBUG << "Parse sorbed species"; slist = SorbedList(sorbed.size(), data->nb_component()); for (auto id: slist.range()) { const YAML::Node& species = sorbed[static_cast<int>(id)]; SorbedValues values; values.label = obtain_label(species, INDB_SECTION_SORBED); auto is_already_in_the_database = slist.get_id(values.label); if (is_already_in_the_database != no_species) throw invalid_database("Sorbed species : " + values.label + "is already in the database."); values.logk = obtain_logk(species, values.label); const scalar_t nb_site_occupied = io::get_yaml_mandatory<scalar_t> (species, INDB_ATTRIBUTE_NBSITEOCCUPIED, values.label); if (nb_site_occupied < 0) { throw db_invalid_data("The number of sites occupied by a sorbed species must be positive. (Species : " + values.label + ", number of sites : " +std::to_string(nb_site_occupied) + ")"); } values.sorption_site_occupied = nb_site_occupied; slist.set_values(id, values); std::map<std::string, scalar_t> compo; obtain_composition(species, compo, values.label); double test_charge = 0; std::map<index_t, scalar_t> to_canonicalize; for (auto it: compo) { const auto idc = data->components.get_id(it.first); if(idc != no_species) { slist.set_nu_ji(id, idc, it.second); test_charge += it.second*data->charge_component(idc); } else { const auto idaq = data->aqueous.get_id(it.first); if (idaq != no_species) { to_canonicalize.insert({idaq, it.second}); test_charge += it.second*data->charge_aqueous(idaq); } else { throw db_invalid_syntax("Unknown species : '" + it.first + "' while parsing equations for " + slist.get_label(id) +"."); } } } if (std::abs(test_charge) > PRECISION_TEST_CHARGE) { throw db_invalid_data("Total charge is not zero for gas '"+slist.get_label(id)+ "' : " + std::to_string(test_charge)+"."); } // canonicalise for (const auto& tocanon: to_canonicalize) { slist.canonicalize(id, data->aqueous, tocanon.first, tocanon.second); } } slist.set_valid(); } void DataReaderYaml::parse_compounds(const YAML::Node& compounds, CompoundList &clist) { DEBUG << "Parse compounds"; clist = CompoundList(compounds.size(), data->nb_component()); for (auto id: clist.range()) { const YAML::Node& species = compounds[static_cast<int>(id)]; std::string label = obtain_label(species, INDB_SECTION_COMPOUNDS); auto is_already_in_the_database = clist.get_id(label); if (is_already_in_the_database != no_species) throw invalid_database("Compounds : " + label + "is already in the database."); clist.set_values(id, label); std::map<std::string, scalar_t> compo; obtain_composition(species, compo, label); double test_charge = 0; std::map<index_t, scalar_t> to_canonicalize; for (auto it: compo) { const auto idc = data->components.get_id(it.first); if(idc != no_species) { clist.set_nu_ji(id, idc, it.second); test_charge += it.second*data->charge_component(idc); } else { const auto idaq = data->aqueous.get_id(it.first); if (idaq != no_species) { to_canonicalize.insert({idaq, it.second}); test_charge += it.second*data->charge_aqueous(idaq); } else { throw db_invalid_syntax("Unknown species : '" + it.first + "' while parsing equations for " + clist.get_label(id) +"."); } } } if (std::abs(test_charge) > PRECISION_TEST_CHARGE) { throw db_invalid_data("Total charge is not zero for gas '"+clist.get_label(id)+ "' : " + std::to_string(test_charge)+"."); } // canonicalise for (const auto& tocanon: to_canonicalize) { clist.canonicalize(id, data->aqueous, tocanon.first, tocanon.second); } + + // check composition + if (m_elem_data->check_compo) + { + if (species[INDB_ATTRIBUTE_FORMULA]) + check_composition(species[INDB_ATTRIBUTE_FORMULA].as<std::string>(), clist, id); + else + check_composition(clist.get_label(id), clist, id); + } } clist.set_valid(); } +void check_element_map(const element_map& formula, const element_map& composition, const std::string& label); + +void DataReaderYaml::check_composition(const std::string& formula, ReactiveSpeciesList& rlist, index_t id) +{ + if (not m_elem_data->is_init) init_elemental_composition(); + + // parse the formula + element_map compo_from_label; + element_composition_from_label(formula, compo_from_label); + + // parse the composition + element_map compo_from_compo; + for (auto idc: data->range_component()) + { + if (rlist.nu_ji(id, idc) == 0.0) continue; + add_to_element_map(compo_from_compo, + m_elem_data->components[idc], + rlist.nu_ji(id, idc) + ); + } + // clean the composition + for (auto it=compo_from_compo.begin(); it!=compo_from_compo.end(); ) + { + if (it->first == "E" or it->second == 0.0) + it = compo_from_compo.erase(it); + else + ++it; + } + for (auto& it: compo_from_compo) + { + std::cout << it.first << " : " << it.second << " # " << compo_from_label[it.first] << "\n"; + } + for (auto& it: compo_from_label) + { + std::cout << it.first << " : " << it.second << " # " << compo_from_compo[it.first] << "\n"; + } + std::cout << std::endl; + + // check that both map are equal + check_element_map(compo_from_label, compo_from_compo, rlist.get_label(id)); +} + + +void check_element_map(const element_map& formula, const element_map& composition, const std::string& label) +{ + for (const auto& it1: formula) + { + const auto it2 = composition.find(it1.first); + if (it2 == composition.cend()) + { + throw db_invalid_data("Species '"+label+"' : element '"+ + it1.first+"' is in the formula but not in the composition"); + } + if (std::abs(it1.second - it2->second) > PRECISION_TEST_CHARGE) + { + throw db_invalid_data("Species '"+label+"' : element '"+ + it1.first+"' has a different stoichiometry in the formula ("+ + std::to_string(it1.second)+ + ") than in the composition ("+ + std::to_string(it2->second)+")."); + } + } + for (const auto& it1: composition) + { + const auto it2 = formula.find(it1.first); + if (it2 == composition.cend()) + { + throw db_invalid_data("Species '"+label+"' : element '"+ + it1.first+"' is in the composition but not in the formula"); + } + } + +} void DataReaderYaml::parse_elements(const YAML::Node& elements, ElementList &elist) { if (elements.size() != data->nb_component()) throw db_invalid_data("The number of elements does not corresponds to the number of components"); for (index_t id: data->range_component()) { const YAML::Node& elem = elements[static_cast<int>(id)]; const std::string elem_label = io::get_yaml_mandatory<std::string>( elem, INDB_ATTRIBUTE_ELEMENT, INDB_SECTION_ELEMENTS); const std::string comp_label = io::get_yaml_mandatory<std::string>( elem, INDB_ATTRIBUTE_COMPONENT, elem_label); index_t id_comp = data->get_id_component(comp_label); if (id_comp == no_species) { throw db_invalid_label("'"+comp_label+"' is not a valid component"); } elist.add_element(elem_label, id_comp); } } std::string obtain_label(const YAML::Node& species, const std::string& section) { return io::get_yaml_mandatory<std::string>(species, INDB_ATTRIBUTE_LABEL, section); } void obtain_composition(const YAML::Node& species, std::map<std::string, scalar_t>& compo, const std::string& label) { const std::string compo_string = io::get_yaml_mandatory<std::string>(species, INDB_ATTRIBUTE_COMPOSITION, label); parse_equation(compo_string, compo); } scalar_t obtain_logk(const YAML::Node& species, const std::string& label) { return io::get_yaml_mandatory<scalar_t>(species, INDB_ATTRIBUTE_LOGK, label); } bool is_kinetic(const YAML::Node& species, const std::string& label) { return io::get_yaml_optional<bool>(species, INDB_ATTRIBUTE_FLAG_KINETIC, label, false); } } //end namespace database } //end namespace specmicp diff --git a/src/database/yaml_reader.hpp b/src/database/yaml_reader.hpp index a9af758..237bf8e 100644 --- a/src/database/yaml_reader.hpp +++ b/src/database/yaml_reader.hpp @@ -1,125 +1,132 @@ /*------------------------------------------------------------------------------- Copyright (c) 2015 F. Georget <fabieng@princeton.edu>, Princeton University All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -----------------------------------------------------------------------------*/ #ifndef SPEMCICP_DATABASE_YAMLREADER_HPP #define SPEMCICP_DATABASE_YAMLREADER_HPP //! \file database/yaml_reader.hpp //! \brief A reader for a database in yaml format #include "module.hpp" namespace YAML { class Node; } namespace specmicp { namespace database { class SPECMICP_DLL_LOCAL DataReaderYaml: public DatabaseModule { public: //! \brief Empty constructor DataReaderYaml(); ~DataReaderYaml(); //! \brief Constructor DataReaderYaml(RawDatabasePtr data, bool check_compo=true); //! \brief Constructor //! //! @param filepath string containing the path to the database DataReaderYaml(const std::string& filepath, bool check_compo=true); //! \brief Constructor //! //! @param input input stream that contains the database DataReaderYaml(std::istream& input, bool check_compo=true); //! Return the databes RawDatabasePtr get_database() {return data;} //! \brief Parse the basis section //! //! Contains the list of primary species void parse_basis(const YAML::Node& basis_root); //! \brief Parse the aqueous section //! //! Contains the list of secondary species void parse_aqueous(const YAML::Node& aqueous_root, AqueousList& alist); //! \brief Parse the mineral section //! //! Contains the list of minerals void parse_minerals( const YAML::Node& minerals, MineralList &minerals_list, MineralList &minerals_kinetic_list ); //! \brief Parse the gas section void parse_gas(const YAML::Node& gas_root, GasList& glist); //! \brief Parse the sorbed species section void parse_sorbed(const YAML::Node& sorbed_root, SorbedList& slist); //! \brief Parse the compounds void parse_compounds(const YAML::Node& compounds, CompoundList& clist); //! \brief Parse the elements void parse_elements(const YAML::Node& elements, ElementList& elist); private: //! \brief Parse database //! //! Throw db_invalid_syntax if a syntax error was detected. //! Throws std::invalid_argument if the path to the database is not correct void parse(std::istream& input); //! \brief Parse database //! //! Throw db_invalid_syntax if a syntax error was detected. //! Throws std::invalid_argument if the path to the database is not correct void parse(const std::string& filepath); //! \brief Parse the metadata section //! //! we don't do much with them for now.... void parse_metadata(const YAML::Node& root); + //! \brief Initialize the data to check the composition + void init_elemental_composition(); + //! \brief Check initial composition + //! + //! \throw db_invalid_data if the composition is not consistent with the formula + void check_composition(const std::string& formula, ReactiveSpeciesList& rlist, index_t id); + DataReaderYaml(const DataReaderYaml&) = delete; DataReaderYaml& operator =(const DataReaderYaml&) = delete; struct DataReaderYamlElementData; std::unique_ptr<DataReaderYamlElementData> m_elem_data; }; } //end namespace database } //end namespace specmicp #endif // SPEMCICP_DATABASE_YAMLREADER_HPP diff --git a/tests/database/database_appender.cpp b/tests/database/database_appender.cpp index e70bfc0..5755b6f 100644 --- a/tests/database/database_appender.cpp +++ b/tests/database/database_appender.cpp @@ -1,132 +1,132 @@ #include "catch.hpp" #include "database/yaml_reader.hpp" #include "database/appender.hpp" #include "str_database.hpp" #include "utils/timer.hpp" #include <iostream> #include <sstream> using namespace specmicp; using namespace specmicp::database; static std::string mineral_appendix_database = R"plop( [ { "label": "MA1", "composition": "2 A2[+], A3[-], C1[-]", "log_k": -10, "molar_volume": 30.0 }, { "label": "MKA1", "composition": "4C3, 3A1", "log_k": -20, "molar_volume": 50.0, "flag_kinetic": 1 } ] )plop"; static std::string gas_appendix_database = R"plop( [ { "label": "GA1", "composition": "A1", "log_k": -6 } ] )plop"; static std::string sorbed_appendix_database = R"plop( [ { "label": "SA1", "composition": "A1", "log_k": -5, "nb_sites_occupied": 2 } ] )plop"; static std::string compounds_appendix_database = R"plop( [ { "label": "CompA1", "composition": "6 H2O, C3" } ] )plop"; TEST_CASE("DatabaseAppender", "[Database],[Appender],[Minerals]") { std::istringstream input(good_test_database); DataReaderYaml reader(input); auto data = reader.get_database(); SECTION("Append minerals") { size_t orig_hash = data->get_hash(); DataAppender appender(data); - appender.add_minerals(mineral_appendix_database); + appender.add_minerals(mineral_appendix_database, false); CHECK(data->is_valid()); CHECK(data->get_hash() != orig_hash); CHECK(data->nb_mineral() == 3); CHECK(data->nb_mineral_kinetic() == 2); CHECK(data->get_id_mineral("MA1") == 2); CHECK(data->get_id_mineral_kinetic("MKA1") == 1); CHECK(data->nu_mineral(2, 3) == 4.0); CHECK(data->molar_volume_mineral(2) == 30.0*1e-6); CHECK(data->molar_mass_mineral_kinetic(1, units::MassUnit::gram) == 31.0); } SECTION("Append gas") { size_t orig_hash = data->get_hash(); DataAppender appender(data); - appender.add_gas(gas_appendix_database); + appender.add_gas(gas_appendix_database, false); CHECK(data->is_valid()); CHECK(data->get_hash() != orig_hash); CHECK(data->nb_gas() == 2); CHECK(data->get_id_gas("GA1") == 1); CHECK(data->nu_gas(1, 2) == 1); CHECK(data->nu_gas(1, 3) == 1); } SECTION("Append sorbed") { size_t orig_hash = data->get_hash(); DataAppender appender(data); - appender.add_sorbed(sorbed_appendix_database); + appender.add_sorbed(sorbed_appendix_database, false); CHECK(data->is_valid()); CHECK(data->get_hash() != orig_hash); CHECK(data->nb_sorbed() == 2); CHECK(data->get_id_sorbed("SA1") == 1); CHECK(data->nu_sorbed(1, 2) == 1); CHECK(data->nu_sorbed(1, 3) == 1); CHECK(data->nb_sorption_sites(1) == 2); } SECTION("Append compounds") { size_t orig_hash = data->get_hash(); DataAppender appender(data); - appender.add_compounds(compounds_appendix_database); + appender.add_compounds(compounds_appendix_database, false); CHECK(data->is_valid()); CHECK(data->get_hash() != orig_hash); CHECK(data->nb_compounds() == 2); CHECK(data->get_id_compound("CompA1") == 1); CHECK(data->nu_compound(1, 0) == 6); CHECK(data->nu_compound(1, 4) == 1); } }