Page MenuHomec4science

reader.cpp
No OneTemporary

File Metadata

Created
Tue, Nov 19, 09:55

reader.cpp

/*-------------------------------------------------------
- Module : database
- File : reader.cpp
- Author : Fabien Georget
Copyright (c) 2014, Fabien Georget <fabieng@princeton.edu>, Princeton University
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Princeton University nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------*/
#include "reader.hpp"
#include "json/json.h"
#include <fstream>
#include <stdexcept>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim_all.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <locale>
#include "utils/log.hpp"
#define INDB_SECTION_BASIS "Basis"
#define INDB_SECTION_AQUEOUS "Aqueous"
#define INDB_SECTION_MINERALS "Minerals"
#define INDB_SECTION_GAS "Gas"
#define INDB_SECTION_SORBED "Sorbed"
#define INDB_ATTRIBUTE_LABEL "label"
#define INDB_ATTRIBUTE_ACTIVITY "activity"
#define INDB_ATTRIBUTE_ACTIVITY_A "a"
#define INDB_ATTRIBUTE_ACTIVITY_B "b"
#define INDB_ATTRIBUTE_MOLARMASS "molar_mass"
#define INDB_ATTRIBUTE_MOLARVOLUME "molar_volume"
#define INDB_ATTRIBUTE_COMPOSITION "composition"
#define INDB_ATTRIBUTE_LOGK "log_k"
#define INDB_ATTRIBUTE_NBSITEOCCUPIED "nb_sites_occupied"
#define INDB_ATTRIBUTE_FLAG_KINETIC "flag_kinetic"
#define INDB_OPEN_DELIMITER_CHARGE '['
#define INDB_CLOSE_DELIMITER_CHARGE ']'
namespace specmicp {
namespace database {
void DataReader::parse()
{
std::ifstream datafile(m_filepath, std::ifstream::binary);
Json::Reader reader;
if (not datafile.is_open())
{
ERROR << "Database not found : " << m_filepath;
std::string message("Database not found : " + m_filepath);
throw std::invalid_argument(message);
}
DEBUG << "Parsing database";
bool parsingSuccessful = reader.parse( datafile, m_root);
if ( !parsingSuccessful )
{
// report to the user the failure and their locations in the document.
std::string message("Failed to parse configuration\n" + reader.getFormatedErrorMessages());
throw std::runtime_error(message);
}
parse_size_db();
parse_basis();
parse_aqueous();
parse_minerals();
parse_gas();
parse_sorbed();
datafile.close();
}
void DataReader::check_database()
{
// TODO
}
void DataReader::parse_size_db()
{
data->nb_component = m_root[INDB_SECTION_BASIS].size();
data->nb_aqueous = m_root[INDB_SECTION_AQUEOUS].size();
data->nb_mineral = m_root[INDB_SECTION_MINERALS].size();
data->nb_gas = m_root[INDB_SECTION_GAS].size();
data->nb_sorbed = m_root[INDB_SECTION_SORBED].size();
data->param_aq = Eigen::MatrixXd::Zero(data->nb_component+data->nb_aqueous, 3);
}
void DataReader::parse_basis()
{
Json::Value& basis = get_mandatory_section(m_root, INDB_SECTION_BASIS);
DEBUG << "Size basis : " << basis.size();
int size_basis = basis.size();
data->labels_basis.reserve(size_basis);
data->_molar_mass_basis.resize(size_basis);
for (int id=0; id<size_basis; ++id)
{
Json::Value& species = basis[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
SPAM << "Parsing component : " << label;
data->map_labels_basis[label] = id;
data->labels_basis.push_back(label);
data->param_aq(id, 0) = charge_from_label(label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
data->param_aq(id, 1) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
data->param_aq(id, 2) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_MOLARMASS);
data->_molar_mass_basis(id) = species[INDB_ATTRIBUTE_MOLARMASS].asDouble();
}
}
void DataReader::parse_aqueous()
{
DEBUG << "Parse aqueous species";
Json::Value& aqueous = get_mandatory_section(m_root, INDB_SECTION_AQUEOUS);
data->labels_aqueous.reserve(data->nb_aqueous);
data->nu_aqueous = reaction_mat_t::Zero(data->nb_aqueous, data->nb_component+data->nb_aqueous);
data->logk_aqueous = logK_vector_t(data->nb_aqueous);
for (int id=0; id<data->nb_aqueous; ++id)
{
Json::Value& species = aqueous[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
data->labels_aqueous.push_back(label);
const int idparam = id + data->nb_component;
data->param_aq(idparam, 0) = charge_from_label(label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
data->param_aq(idparam, 1) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
data->param_aq(idparam, 2) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
// equation
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, scalar_t> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge=0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, index_t>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) {
data->nu_aqueous(id, search->second) = it->second;
test_charge += it->second*data->param_aq(search->second, 0);
}
else {
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
const int id_aq = (searchaq - data->labels_aqueous.begin()) + data->nb_component;
data->nu_aqueous(id, id_aq) = it->second;
test_charge += it->second*data->param_aq(id_aq, 0);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != data->param_aq(idparam, 0))
{
throw db_invalid_data("Total charge is not zero for aqueous species : '"+label+
"' - Charge-decomposition - charge species : "+
std::to_string(test_charge - data->param_aq(id, 0) )+".");
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
data->logk_aqueous(id) = species[INDB_ATTRIBUTE_LOGK].asDouble();
}
}
void DataReader::parse_minerals()
{
DEBUG << "Parse minerals";
Json::Value& minerals = get_mandatory_section(m_root, INDB_SECTION_MINERALS);
// find kinetic minerals
int nb_kin = 0;
for (int id=0; id<data->nb_mineral; ++id)
{
bool is_kin = false;
if (minerals[id].isMember(INDB_ATTRIBUTE_FLAG_KINETIC))
{
is_kin = minerals[id][INDB_ATTRIBUTE_FLAG_KINETIC].asBool();
}
if (is_kin) ++nb_kin;
}
data->nb_mineral = data->nb_mineral - nb_kin;
data->labels_minerals.reserve(data->nb_mineral);
data->nu_mineral = reaction_mat_t::Zero(data->nb_mineral, data->nb_component+data->nb_aqueous);
data->logk_mineral = logK_vector_t(data->nb_mineral);
data->_molar_volume_mineral.resize(data->nb_mineral);
data->_stability_mineral = list_stability_t(data->nb_mineral, MineralStabilityClass::equilibrium);
data->nb_mineral_kinetic = nb_kin;
data->labels_minerals_kinetic.reserve(data->nb_mineral_kinetic);
data->nu_mineral_kinetic = reaction_mat_t::Zero(data->nb_mineral_kinetic, data->nb_component+data->nb_aqueous);
data->logk_mineral_kinetic = logK_vector_t(data->nb_mineral_kinetic);
data->_molar_volume_mineral_kinetic.resize(data->nb_mineral);
// id for each class of minerals
int id_in_eq=0;
int id_in_kin=0;
for (int id=0; id<data->nb_mineral+data->nb_mineral_kinetic; ++id)
{
Json::Value& species = minerals[id];
//check if material is at equilibrium or governed by kinetics
bool is_kin = false;
if (species.isMember(INDB_ATTRIBUTE_FLAG_KINETIC))
{
is_kin = species[INDB_ATTRIBUTE_FLAG_KINETIC].asBool();
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
const std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
if (is_kin) data->labels_minerals_kinetic.push_back(label);
else data->labels_minerals.push_back(label);
// equation
// --------
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, scalar_t> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge = 0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, index_t>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) { // this is a component
if (is_kin) data->nu_mineral_kinetic(id_in_kin, search->second) = it->second;
else data->nu_mineral(id_in_eq, search->second) = it->second;
test_charge += it->second * data->param_aq(search->second, 0);
}
else { // this is an aqueous species
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
int id_aq = searchaq-data->labels_aqueous.begin()+data->nb_component;
if (is_kin) data->nu_mineral_kinetic(id_in_kin,id_aq) = it->second;
else data->nu_mineral(id_in_eq, id_aq) = it->second;
test_charge += it->second * data->param_aq(id_aq, 0);
}
else // or something we don't know
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != 0)
{
throw db_invalid_data("Total charge is not zero for mineral : '"+label+
"' - total charge : "+std::to_string(test_charge)+".");
}
// log(K)
// ------
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
if (is_kin) data->logk_mineral_kinetic(id_in_kin) = species[INDB_ATTRIBUTE_LOGK].asDouble();
else data->logk_mineral(id_in_eq) = species[INDB_ATTRIBUTE_LOGK].asDouble();
// Molar volume
// -------------
if (species.isMember(INDB_ATTRIBUTE_MOLARVOLUME))
{
if (is_kin) data->_molar_volume_mineral_kinetic(id_in_kin) = species[INDB_ATTRIBUTE_MOLARVOLUME].asDouble();
else data->_molar_volume_mineral(id_in_eq) = species[INDB_ATTRIBUTE_MOLARVOLUME].asDouble();
}
else
{
if (is_kin) data->_molar_volume_mineral_kinetic(id_in_kin) = -1;
else data->_molar_volume_mineral(id_in_eq) = -1;
}
// update indices
if (is_kin) {++id_in_kin;}
else {++id_in_eq;}
}
assert(id_in_kin == data->nb_mineral_kinetic);
assert(id_in_eq == data->nb_mineral);
}
void DataReader::parse_gas()
{
DEBUG << "Parse gas";
Json::Value& gas = get_mandatory_section(m_root, INDB_SECTION_GAS);
data->labels_gas.reserve(data->nb_gas);
data->nu_gas = reaction_mat_t::Zero(data->nb_gas, data->nb_component+data->nb_aqueous);
data->logk_gas = logK_vector_t(data->nb_gas);
for (int id=0; id<data->nb_gas; ++id)
{
Json::Value& species = gas[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
data->labels_gas.push_back(label);
// equation
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, scalar_t> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge=0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, index_t>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) {
data->nu_gas(id, search->second) = it->second;
test_charge += it->second*data->param_aq(search->second, 0);
}
else {
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
const int id_aq = (searchaq - data->labels_aqueous.begin()) + data->nb_component;
data->nu_gas(id, id_aq) = it->second;
test_charge += it->second*data->param_aq(id_aq, 0);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != 0)
{
throw db_invalid_data("Total charge is not zero for gas '"+label+
"' : " + std::to_string(test_charge)+".");
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
data->logk_gas(id) = species[INDB_ATTRIBUTE_LOGK].asDouble();
}
}
void DataReader::parse_sorbed()
{
DEBUG << "Parse sorbed species";
Json::Value& sorbed = get_mandatory_section(m_root, INDB_SECTION_SORBED);
data->labels_sorbed.reserve(data->nb_sorbed);
data->nu_sorbed = reaction_mat_t::Zero(data->nb_sorbed, data->nb_component+data->nb_aqueous+1);
data->logk_sorbed = logK_vector_t(data->nb_sorbed);
for (int id=0; id<data->nb_sorbed; ++id)
{
Json::Value& species = sorbed[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
data->labels_sorbed.push_back(label);
// equation
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, scalar_t> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge=0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, index_t>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) {
data->nu_sorbed(id, search->second) = it->second;
test_charge += it->second*data->param_aq(search->second, 0);
}
else {
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
const int id_aq = (searchaq - data->labels_aqueous.begin()) + data->nb_component;
data->nu_sorbed(id, id_aq) = it->second;
test_charge += it->second*data->param_aq(id_aq, 0);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != 0)
{
throw db_invalid_data("Total charge is not zero for gas '"+label+
"' : " + std::to_string(test_charge)+".");
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_NBSITEOCCUPIED);
const scalar_t nb_site_occupied = species[INDB_ATTRIBUTE_NBSITEOCCUPIED].asDouble();
if (nb_site_occupied < 0)
{
throw db_invalid_data("The number of sites occupied by a sorbed species must be positive. (Species : "
+ label + ", number of sites : " +std::to_string(nb_site_occupied) + ")");
}
data->nu_sorbed(id, data->nb_component+data->nb_aqueous) = species[INDB_ATTRIBUTE_NBSITEOCCUPIED].asDouble();
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
data->logk_sorbed(id) = species[INDB_ATTRIBUTE_LOGK].asDouble();
}
}
void parse_equation(const std::string& equation, std::map<std::string, scalar_t> &compo)
{
std::vector<std::string> list_compo;
boost::split(list_compo, equation, [](char input){return input == ',';}, boost::token_compress_on);
for (auto it=list_compo.begin(); it!=list_compo.end(); ++it)
{
std::string& toparse = *it;
double coeff = 0;
boost::trim_all(toparse);
unsigned int pos_end = 0;
while (pos_end < toparse.size())
{
if (std::isalpha(toparse[pos_end])) // or std::isblank(toparse[pos_end]))
{
break;
}
++pos_end;
}
std::string label(toparse.substr(pos_end, toparse.size()-pos_end));
boost::trim_all(label);
if (pos_end == 0) coeff =1;
else if (pos_end == 1 and toparse[0] == '-') coeff=-1;
else
{
std::string tofloat = toparse.substr(0, pos_end);
while (pos_end > 1)
{
if ((tofloat[0] == '-' or tofloat[0] == '+')
and std::isspace(tofloat[1]) )
{
tofloat = tofloat[0] + tofloat.substr(2,pos_end-2);
--pos_end;
continue;
}
else
{
break;
}
}
if ( tofloat[pos_end-1] == '-' ) {coeff = -1;}
else if (tofloat[pos_end-1] == '+') {coeff = +1;}
else {coeff = std::stof(tofloat);}
}
compo[label] = coeff;
}
}
double charge_from_label(const std::string& label)
{
int sign=1;
auto start= label.end();
auto stop = label.end();
for (auto it=label.begin(); it != label.end(); ++it)
{
if (*it == INDB_OPEN_DELIMITER_CHARGE) { start = it; }
}
if (start == label.end()) {return 0;} // no charge specified
for (auto it=start+1; it != label.end(); ++it)
{
if (*it == INDB_CLOSE_DELIMITER_CHARGE) { stop = it; }
}
if (stop == label.end()) {throw db_invalid_syntax("Charge : missing closing delimiter for species : "+label+".");}
start = start+1;
if (stop == start) {return 0;} // nothing inside bracket
// get the signs
if (*(stop-1) == '-')
{
sign = -1;
stop =stop-1;
}
else if (*(stop-1) == '+')
{
sign = +1;
stop = stop-1;
}
if (stop == start)
{
return sign; // just the sign, |z|=1
}
double charge;
try
{
charge = sign*std::stof(std::string(start,stop));
}
catch (std::invalid_argument&)
{
throw db_invalid_syntax("Charge : bad formatting for species :"+ label+".");
}
return charge;
}
} // end namespace specmicp
} // end namespace chemy
#undef INDB_SECTION_BASIS
#undef INDB_SECTION_AQUEOUS
#undef INDB_SECTION_MINERALS
#undef INDB_SECTION_GAS
#undef INDB_ATTRIBUTE_NAME
#undef INDB_ATTRIBUTE_LABEL
#undef INDB_ATTRIBUTE_ACTIVITY
#undef INDB_ATTRIBUTE_ACTIVITY_A
#undef INDB_ATTRIBUTE_ACTIVITY_B
#undef INDB_ATTRIBUTE_MOLARMASS
#undef INDB_ATTRIBUTE_COMPOSITION
#undef INDB_ATTRIBUTE_LOGK
#undef INDB_ATTRIBUTE_FLAG_KINETIC
#undef INDB_OPEN_DELIMITER_CHARGE
#undef INDB_CLOSE_DELIMITER_CHARGE

Event Timeline