Page MenuHomec4science

reader.cpp
No OneTemporary

File Metadata

Created
Sat, Nov 16, 04:12

reader.cpp

/*-------------------------------------------------------
- Module : database
- File : reader.cpp
- Author : Fabien Georget
Copyright (c) 2014, Fabien Georget, Princeton University
---------------------------------------------------------*/
#include "reader.hpp"
#include "jsoncpp/json/reader.h"
#include <fstream>
#include <stdexcept>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim_all.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <locale>
#include "utils/log.hpp"
#define INDB_SECTION_BASIS "Basis"
#define INDB_SECTION_AQUEOUS "Aqueous"
#define INDB_SECTION_MINERALS "Minerals"
#define INDB_SECTION_GAS "Gas"
#define INDB_ATTRIBUTE_LABEL "label"
#define INDB_ATTRIBUTE_ACTIVITY "activity"
#define INDB_ATTRIBUTE_ACTIVITY_A "a"
#define INDB_ATTRIBUTE_ACTIVITY_B "b"
#define INDB_ATTRIBUTE_MOLARMASS "molar_mass"
#define INDB_ATTRIBUTE_MOLARVOLUME "molar_volume"
#define INDB_ATTRIBUTE_COMPOSITION "composition"
#define INDB_ATTRIBUTE_LOGK "log_k"
#define INDB_ATTRIBUTE_FLAG_KINETIC "flag_kinetic"
#define INDB_OPEN_DELIMITER_CHARGE '['
#define INDB_CLOSE_DELIMITER_CHARGE ']'
namespace specmicp {
namespace database {
void DataReader::parse()
{
std::ifstream datafile(m_filepath, std::ifstream::binary);
Json::Reader reader;
if (not datafile.is_open())
{
ERROR << "Database not found : " << m_filepath;
std::string message("Database not found : " + m_filepath);
throw std::invalid_argument(message);
}
DEBUG << "Parsing database";
bool parsingSuccessful = reader.parse( datafile, m_root);
if ( !parsingSuccessful )
{
// report to the user the failure and their locations in the document.
std::string message("Failed to parse configuration\n" + reader.getFormatedErrorMessages());
throw std::runtime_error(message);
}
parse_size_db();
parse_basis();
parse_aqueous();
parse_minerals();
parse_gas();
datafile.close();
}
void DataReader::check_database()
{
}
void DataReader::parse_size_db()
{
data->nb_component = m_root[INDB_SECTION_BASIS].size();
data->nb_aqueous = m_root[INDB_SECTION_AQUEOUS].size();
data->nb_mineral = m_root[INDB_SECTION_MINERALS].size();
data->nb_gas = m_root[INDB_SECTION_GAS].size();
data->param_aq = Eigen::MatrixXd::Zero(data->nb_component+data->nb_aqueous, 3);
}
void DataReader::parse_basis()
{
Json::Value& basis = get_mandatory_section(m_root, INDB_SECTION_BASIS);
DEBUG << "Size basis : " << basis.size();
int size_basis = basis.size();
data->labels_basis.reserve(size_basis);
data->molar_mass_basis.resize(size_basis);
for (int id=0; id<size_basis; ++id)
{
Json::Value& species = basis[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
SPAM << "Parsing component : " << label;
data->map_labels_basis[label] = id;
data->labels_basis.push_back(label);
data->param_aq(id, 0) = charge_from_label(label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
data->param_aq(id, 1) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
data->param_aq(id, 2) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_MOLARMASS);
data->molar_mass_basis(id) = species[INDB_ATTRIBUTE_MOLARMASS].asDouble();
}
}
void DataReader::parse_aqueous()
{
DEBUG << "Parse aqueous species";
Json::Value& aqueous = get_mandatory_section(m_root, INDB_SECTION_AQUEOUS);
data->labels_aqueous.reserve(data->nb_aqueous);
data->nu_aqueous = reaction_mat_t::Zero(data->nb_aqueous, data->nb_component+data->nb_aqueous);
data->logk_aqueous = logK_vector_t(data->nb_aqueous);
for (int id=0; id<data->nb_aqueous; ++id)
{
Json::Value& species = aqueous[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
data->labels_aqueous.push_back(label);
const int idparam = id + data->nb_component;
data->param_aq(idparam, 0) = charge_from_label(label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
data->param_aq(idparam, 1) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
data->param_aq(idparam, 2) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
// equation
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, float> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge=0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, int>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) {
data->nu_aqueous(id, search->second) = it->second;
test_charge += it->second*data->param_aq(search->second, 0);
}
else {
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
const int id_aq = (searchaq - data->labels_aqueous.begin()) + data->nb_component;
data->nu_aqueous(id, id_aq) = it->second;
test_charge += it->second*data->param_aq(id_aq, 0);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != data->param_aq(idparam, 0))
{
throw db_invalid_data("Total charge is not zero for aqueous species : '"+label+
"' - Charge-decomposition - charge species : "+
std::to_string(test_charge - data->param_aq(id, 0) )+".");
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
data->logk_aqueous(id) = species[INDB_ATTRIBUTE_LOGK].asDouble();
}
}
void DataReader::parse_minerals()
{
DEBUG << "Parse minerals";
Json::Value& minerals = get_mandatory_section(m_root, INDB_SECTION_MINERALS);
// find kinetic minerals
int nb_kin = 0;
for (int id=0; id<data->nb_mineral; ++id)
{
bool is_kin = false;
if (minerals[id].isMember(INDB_ATTRIBUTE_FLAG_KINETIC))
{
is_kin = minerals[id][INDB_ATTRIBUTE_FLAG_KINETIC].asBool();
}
if (is_kin) ++nb_kin;
}
data->nb_mineral = data->nb_mineral - nb_kin;
data->labels_minerals.reserve(data->nb_mineral);
data->nu_mineral = reaction_mat_t::Zero(data->nb_mineral, data->nb_component+data->nb_aqueous);
data->logk_mineral = logK_vector_t(data->nb_mineral);
data->_molar_volume_mineral.resize(data->nb_mineral);
data->nb_mineral_kinetic = nb_kin;
data->labels_minerals_kinetic.reserve(data->nb_mineral_kinetic);
data->nu_mineral_kinetic = reaction_mat_t::Zero(data->nb_mineral_kinetic, data->nb_component+data->nb_aqueous);
data->logk_mineral_kinetic = logK_vector_t(data->nb_mineral_kinetic);
data->_molar_volume_mineral_kinetic.resize(data->nb_mineral);
// id for each class of minerals
int id_in_eq=0;
int id_in_kin=0;
for (int id=0; id<data->nb_mineral+data->nb_mineral_kinetic; ++id)
{
Json::Value& species = minerals[id];
//check if material is at equilibrium or governed by kinetics
bool is_kin = false;
if (species.isMember(INDB_ATTRIBUTE_FLAG_KINETIC))
{
is_kin = species[INDB_ATTRIBUTE_FLAG_KINETIC].asBool();
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
const std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
if (is_kin) data->labels_minerals_kinetic.push_back(label);
else data->labels_minerals.push_back(label);
// equation
// --------
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, float> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge = 0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, int>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) { // this is a component
if (is_kin) data->nu_mineral_kinetic(id_in_kin, search->second) = it->second;
else data->nu_mineral(id_in_eq, search->second) = it->second;
test_charge += it->second * data->param_aq(search->second, 0);
}
else { // this is an aqueous species
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
int id_aq = searchaq-data->labels_aqueous.begin()+data->nb_component;
if (is_kin) data->nu_mineral_kinetic(id_in_kin,id_aq) = it->second;
else data->nu_mineral(id_in_eq, id_aq) = it->second;
test_charge += it->second * data->param_aq(id_aq, 0);
}
else // or something we don't know
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != 0)
{
throw db_invalid_data("Total charge is not zero for mineral : '"+label+
"' - total charge : "+std::to_string(test_charge)+".");
}
// log(K)
// ------
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
if (is_kin) data->logk_mineral_kinetic(id_in_kin) = species[INDB_ATTRIBUTE_LOGK].asDouble();
else data->logk_mineral(id_in_eq) = species[INDB_ATTRIBUTE_LOGK].asDouble();
// Molar volume
// -------------
if (species.isMember(INDB_ATTRIBUTE_MOLARVOLUME))
{
if (is_kin) data->_molar_volume_mineral_kinetic(id_in_kin) = species[INDB_ATTRIBUTE_MOLARVOLUME].asDouble();
else data->_molar_volume_mineral(id_in_eq) = species[INDB_ATTRIBUTE_MOLARVOLUME].asDouble();
}
else
{
if (is_kin) data->_molar_volume_mineral_kinetic(id_in_kin) = -1;
else data->_molar_volume_mineral(id_in_eq) = -1;
}
// update indices
if (is_kin) {++id_in_kin;}
else {++id_in_eq;}
}
assert(id_in_kin == data->nb_mineral_kinetic);
assert(id_in_eq == data->nb_mineral);
}
void DataReader::parse_gas()
{
DEBUG << "Parse gas";
Json::Value& gas = get_mandatory_section(m_root, INDB_SECTION_GAS);
data->labels_gas.reserve(data->nb_gas);
data->nu_gas = reaction_mat_t::Zero(data->nb_gas, data->nb_component+data->nb_aqueous);
data->logk_gas = logK_vector_t(data->nb_gas);
for (int id=0; id<data->nb_gas; ++id)
{
Json::Value& species = gas[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
data->labels_gas.push_back(label);
// equation
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, float> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge=0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, int>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) {
data->nu_gas(id, search->second) = it->second;
test_charge += it->second*data->param_aq(search->second, 0);
}
else {
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
const int id_aq = (searchaq - data->labels_aqueous.begin()) + data->nb_component;
data->nu_gas(id, id_aq) = it->second;
test_charge += it->second*data->param_aq(id_aq, 0);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != 0)
{
throw db_invalid_data("Total charge is not zero for gas '"+label+
"' : " + std::to_string(test_charge)+".");
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
data->logk_gas(id) = species[INDB_ATTRIBUTE_LOGK].asDouble();
}
}
void parse_equation(const std::string& equation, std::map<std::string, float>& compo)
{
std::vector<std::string> list_compo;
boost::split(list_compo, equation, [](char input){return input == ',';}, boost::token_compress_on);
for (auto it=list_compo.begin(); it!=list_compo.end(); ++it)
{
std::string& toparse = *it;
double coeff = 0;
boost::trim_all(toparse);
unsigned int pos_end = 0;
while (pos_end < toparse.size())
{
if (std::isalpha(toparse[pos_end])) // or std::isblank(toparse[pos_end]))
{
break;
}
++pos_end;
}
std::string label(toparse.substr(pos_end, toparse.size()-pos_end));
boost::trim_all(label);
if (pos_end == 0) coeff =1;
else if (pos_end == 1 and toparse[0] == '-') coeff=-1;
else
{
std::string tofloat = toparse.substr(0, pos_end);
while (pos_end > 1)
{
if ((tofloat[0] == '-' or tofloat[0] == '+')
and std::isspace(tofloat[1]) )
{
tofloat = tofloat[0] + tofloat.substr(2,pos_end-2);
--pos_end;
continue;
}
else
{
break;
}
}
if ( tofloat[pos_end-1] == '-' ) {coeff = -1;}
else if (tofloat[pos_end-1] == '+') {coeff = +1;}
else {coeff = std::stof(tofloat);}
}
compo[label] = coeff;
}
}
double charge_from_label(const std::string& label)
{
int sign=1;
auto start= label.end();
auto stop = label.end();
for (auto it=label.begin(); it != label.end(); ++it)
{
if (*it == INDB_OPEN_DELIMITER_CHARGE) { start = it; }
}
if (start == label.end()) {return 0;} // no charge specified
for (auto it=start+1; it != label.end(); ++it)
{
if (*it == INDB_CLOSE_DELIMITER_CHARGE) { stop = it; }
}
if (stop == label.end()) {throw db_invalid_syntax("Charge : missing closing delimiter for species : "+label+".");}
start = start+1;
if (stop == start) {return 0;} // nothing inside bracket
// get the signs
if (*(stop-1) == '-')
{
sign = -1;
stop =stop-1;
}
else if (*(stop-1) == '+')
{
sign = +1;
stop = stop-1;
}
if (stop == start)
{
return sign; // just the sign, |z|=1
}
double charge;
try
{
charge = sign*std::stof(std::string(start,stop));
}
catch (std::invalid_argument&)
{
throw db_invalid_syntax("Charge : bad formatting for species :"+ label+".");
}
return charge;
}
} // end namespace specmicp
} // end namespace chemy
#undef INDB_SECTION_BASIS
#undef INDB_SECTION_AQUEOUS
#undef INDB_SECTION_MINERALS
#undef INDB_SECTION_GAS
#undef INDB_ATTRIBUTE_NAME
#undef INDB_ATTRIBUTE_LABEL
#undef INDB_ATTRIBUTE_ACTIVITY
#undef INDB_ATTRIBUTE_ACTIVITY_A
#undef INDB_ATTRIBUTE_ACTIVITY_B
#undef INDB_ATTRIBUTE_MOLARMASS
#undef INDB_ATTRIBUTE_COMPOSITION
#undef INDB_ATTRIBUTE_LOGK
#undef INDB_ATTRIBUTE_FLAG_KINETIC
#undef INDB_OPEN_DELIMITER_CHARGE
#undef INDB_CLOSE_DELIMITER_CHARGE

Event Timeline