Page MenuHomec4science

reader.cpp
No OneTemporary

File Metadata

Created
Thu, Oct 10, 13:29

reader.cpp

/*-------------------------------------------------------------------------------
Copyright (c) 2014,2015 F. Georget <fabieng@princeton.edu>, Princeton University
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------*/
#include "reader.hpp"
#include "json/json.h"
#include "../utils/json.hpp"
#include <fstream>
#include <stdexcept>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim_all.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <locale>
#include "utils/log.hpp"
#include "section_name.hpp"
#define PRECISION_TEST_CHARGE 1e-6
#include <iostream>
namespace specmicp {
namespace database {
// safe access to values
std::string obtain_label(Json::Value& species);
void obtain_composition(Json::Value& species, std::map<std::string, scalar_t>& compo);
scalar_t obtain_logk(Json::Value& species);
bool is_kinetic(Json::Value& species);
void DataReader::parse(const std::string& filepath)
{
std::ifstream datafile(filepath, std::ifstream::binary);
if (not datafile.is_open())
{
ERROR << "Database not found : " << filepath;
std::string message("Database not found : " + filepath);
throw std::invalid_argument(message);
}
data->metadata.path = filepath;
parse(datafile);
datafile.close();
}
void DataReader::parse(std::istream& input )
{
Json::Value root;
json::parse_json(input, root);
json::check_for_mandatory_member(root, INDB_SECTION_METADATA);
parse_metadata(root[INDB_SECTION_METADATA]);
// Basis
// ------
json::check_for_mandatory_member(root, INDB_SECTION_BASIS);
parse_basis(root[INDB_SECTION_BASIS]);
// Aqueous species
// ---------------
json::check_for_mandatory_member(root, INDB_SECTION_AQUEOUS);
AqueousList alist;
parse_aqueous(root[INDB_SECTION_AQUEOUS], alist);
data->aqueous = std::move(alist);
// Minerals
// --------
json::check_for_mandatory_member(root, INDB_SECTION_MINERALS);
MineralList minerals, minerals_kinetic;
parse_minerals(root[INDB_SECTION_MINERALS], minerals, minerals_kinetic);
data->minerals = std::move(minerals);
data->minerals_kinetic = std::move(minerals_kinetic);
// Gas
// ----
GasList glist(0, data->nb_component());
if (root.isMember(INDB_SECTION_GAS))
parse_gas(root[INDB_SECTION_GAS], glist);
else glist.set_valid();
data->gas = std::move(glist);
// Sorbed species
// --------------
SorbedList slist(0, data->nb_component());
if (root.isMember(INDB_SECTION_SORBED))
parse_sorbed(root[INDB_SECTION_SORBED], slist);
else slist.set_valid();
data->sorbed = std::move(slist);
// Compounds
// ---------
CompoundList clist(0, data->nb_component());
if (root.isMember(INDB_SECTION_COMPOUNDS))
parse_compounds(root[INDB_SECTION_COMPOUNDS], clist);
else clist.set_valid();
data->compounds = std::move(clist);
}
void DataReader::parse_metadata(Json::Value &root)
{
json::check_for_mandatory_member(root, INDB_ATTRIBUTE_NAME);
data->metadata.name = root[INDB_ATTRIBUTE_NAME].asString();
json::check_for_mandatory_member(root, INDB_ATTRIBUTE_VERSION);
data->metadata.version = root[INDB_ATTRIBUTE_VERSION].asString();
}
void DataReader::parse_basis(Json::Value& basis)
{
DEBUG << "Size basis : " << basis.size();
data->components = ComponentList(basis.size());
if (data->nb_component() <= 3)
throw std::invalid_argument("The basis is too small.");
index_t starting_point = 2;
for (int id: data->components.range())
{
index_t id_in_db = starting_point;
if (id_in_db >= data->nb_component())
{
throw std::invalid_argument("The basis should contain H2O and E[-].");
}
Json::Value species = basis[id];
json::check_for_mandatory_member(species, INDB_ATTRIBUTE_LABEL);
std::string label = obtain_label(species);
if (label == water_label) {
id_in_db = 0;
//throw invalid_database("The first component should be "+water_label+".");
}
else if (label == electron_label) {
id_in_db = 1;
//throw invalid_database("The second component should be "+electron_label+".");
}
else {
++starting_point;
}
auto is_already_in_the_database = data->components.get_id(label);
if (is_already_in_the_database != no_species)
throw invalid_database("Component : " + label + "is already in the database.");
SPAM << "Parsing component : " << label;
ComponentValues values;
values.label = label;
values.ionic_values.charge = charge_from_label(label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
values.ionic_values.a_debye = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
values.ionic_values.b_debye = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
json::check_for_mandatory_member(species, INDB_ATTRIBUTE_MOLARMASS);
values.molar_mass = species[INDB_ATTRIBUTE_MOLARMASS].asDouble();
data->components.set_values(id_in_db, std::move(values));
}
data->components.set_valid();
}
void DataReader::parse_aqueous(Json::Value& aqueous, AqueousList& alist)
{
DEBUG << "Parse aqueous species";
alist = AqueousList(aqueous.size(), data->nb_component());
//const auto size = data->nb_aqueous();
for (auto id: alist.range())
{
Json::Value& species = aqueous[static_cast<int>(id)];
AqueousValues values;
values.label = obtain_label(species);
values.ionic_values.charge = charge_from_label(values.label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
values.ionic_values.a_debye = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
values.ionic_values.b_debye = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
values.logk = obtain_logk(species);
alist.set_values(id, std::move(values));
// equation
std::map<std::string, scalar_t> compo;
obtain_composition(species, compo);
scalar_t test_charge {0.0};
std::map<index_t, scalar_t> to_canonicalize;
for (const auto& it: compo)
{
// component ?
const auto search = data->components.get_id(it.first);
if(search != no_species) {
alist.set_nu_ji(id, search, it.second);
test_charge += it.second*data->charge_component(search);
}
// aqueous species ?
else {
// in the document we parse ?
const auto id_aq = alist.get_id(it.first);
if (id_aq != no_species)
{
to_canonicalize.insert({id_aq, it.second});
test_charge += it.second*alist.charge(id_aq);
}
// cannot be found...
else
{
throw db_invalid_syntax("Unknown species : '" + it.first
+ "' while parsing equations for " + alist.get_label(id) +".");
}
}
}
if (std::abs(test_charge - alist.charge(id)) > PRECISION_TEST_CHARGE)
{
throw db_invalid_data("Total charge is not zero for aqueous species : '" +
alist.get_label(id) +
"' - Charge-decomposition - charge species : "+
std::to_string(test_charge - alist.charge(id) )+".");
}
for (const auto& tocanon: to_canonicalize)
{
alist.canonicalize(id, tocanon.first, tocanon.second);
}
}
alist.set_valid();
}
void DataReader::parse_minerals(Json::Value& minerals, MineralList& minerals_list, MineralList& minerals_kinetic_list)
{
DEBUG << "Parse minerals";
index_t nb_mineral = minerals.size();
// find kinetic minerals
int nb_kin = 0;
for (int id=0; id<nb_mineral; ++id)
{
if (is_kinetic(minerals[id])) ++nb_kin;
}
minerals_list = MineralList(nb_mineral - nb_kin, data->nb_component());
minerals_kinetic_list = MineralList(nb_kin, data->nb_component());
// id for each class of minerals
index_t id_in_eq=0;
index_t id_in_kin=0;
for (index_t id=0; id<nb_mineral; ++id)
{
Json::Value& species = minerals[static_cast<int>(id)];
//check if material is at equilibrium or governed by kinetics
bool is_kin = is_kinetic(species);
MineralValue value;
value.label = std::move(obtain_label(species));
value.logk = obtain_logk(species);
if (species.isMember(INDB_ATTRIBUTE_MOLARVOLUME))
{
value.molar_volume = species[INDB_ATTRIBUTE_MOLARVOLUME].asDouble();
}
else
{
// ###FIXME
value.molar_volume = -1;
}
auto& mlist = is_kin?minerals_kinetic_list:minerals_list;
auto& true_id = is_kin?id_in_kin:id_in_eq;
mlist.set_values(true_id, std::move(value));
// equation
// --------
std::map<std::string, scalar_t> compo;
obtain_composition(species, compo);
double test_charge = 0;
std::map<index_t, scalar_t> to_canonicalize;
for (const auto& it: compo)
{
auto idc = data->components.get_id(it.first);
if(idc != no_species) { // this is a component
mlist.set_nu_ji(true_id, idc, it.second);
test_charge += it.second * data->charge_component(idc);
}
else { // this is an aqueous species
auto idaq = data->aqueous.get_id(it.first);
if (idaq != no_species)
{
to_canonicalize.insert({idaq, it.second});
test_charge += it.second * data->charge_aqueous(idaq);
}
else // or something we don't know
{
throw db_invalid_syntax("Unknown species : '" + it.first
+ "' while parsing equations for " + mlist.get_label(true_id) +".");
}
}
}
if (std::abs(test_charge) > PRECISION_TEST_CHARGE)
{
throw db_invalid_data("Total charge is not zero for mineral : '"+ mlist.get_label(true_id) +
"' - total charge : "+std::to_string(test_charge)+".");
}
// canonicalise
for (const auto& tocanon: to_canonicalize)
{
mlist.canonicalize(true_id, data->aqueous, tocanon.first, tocanon.second);
}
// update index
++true_id;
}
specmicp_assert(id_in_kin == minerals_kinetic_list.size());
specmicp_assert(id_in_eq == minerals_list.size());
minerals_list.set_valid();
minerals_kinetic_list.set_valid();
}
void DataReader::parse_gas(Json::Value& gas, GasList& glist)
{
DEBUG << "Parse gas";
glist = GasList(gas.size(), data->nb_component());
for (auto id: glist.range())
{
Json::Value& species = gas[static_cast<int>(id)];
GasValues values;
values.label = obtain_label(species);
auto is_already_in_the_database = data->gas.get_id(values.label);
if (is_already_in_the_database != no_species)
throw invalid_database("Component : " + values.label + "is already in the database.");
values.logk = obtain_logk(species);
glist.set_values(id, std::move(values));
// equation
std::map<std::string, scalar_t> compo;
obtain_composition(species, compo);
scalar_t test_charge = 0.0;
std::map<index_t, scalar_t> to_canonicalize;
for (auto it: compo)
{
const auto idc = data->components.get_id(it.first);
// component
if(idc != no_species) {
glist.set_nu_ji(id, idc, it.second);
test_charge += it.second*data->charge_component(idc);
}
// aqueous species
else {
const auto idaq = data->aqueous.get_id(it.first);
if (idaq != no_species)
{
to_canonicalize.insert({idaq, it.second});
test_charge += it.second*data->charge_aqueous(idaq);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it.first +
"' while parsing equations for " + data->gas.get_label(id) +".");
}
}
}
if (std::abs(test_charge) > PRECISION_TEST_CHARGE)
{
throw db_invalid_data("Total charge is not zero for gas '"+data->gas.get_label(id)+
"' : " + std::to_string(test_charge)+".");
}
// canonicalise
for (const auto& tocanon: to_canonicalize)
{
glist.canonicalize(id, data->aqueous, tocanon.first, tocanon.second);
}
}
glist.set_valid();
}
void DataReader::parse_sorbed(Json::Value& sorbed, SorbedList& slist)
{
DEBUG << "Parse sorbed species";
slist = SorbedList(sorbed.size(), data->nb_component());
for (auto id: slist.range())
{
Json::Value& species = sorbed[static_cast<int>(id)];
SorbedValues values;
values.label = obtain_label(species);
auto is_already_in_the_database = slist.get_id(values.label);
if (is_already_in_the_database != no_species)
throw invalid_database("Sorbed species : " + values.label + "is already in the database.");
values.logk = obtain_logk(species);
json::check_for_mandatory_member(species, INDB_ATTRIBUTE_NBSITEOCCUPIED);
const auto nb_site_occupied = species[INDB_ATTRIBUTE_NBSITEOCCUPIED].asDouble();
if (nb_site_occupied < 0)
{
throw db_invalid_data("The number of sites occupied by a sorbed species must be positive. (Species : "
+ values.label + ", number of sites : " +std::to_string(nb_site_occupied) + ")");
}
values.sorption_site_occupied = nb_site_occupied;
slist.set_values(id, std::move(values));
std::map<std::string, scalar_t> compo;
obtain_composition(species, compo);
double test_charge = 0;
std::map<index_t, scalar_t> to_canonicalize;
for (auto it: compo)
{
const auto idc = data->components.get_id(it.first);
if(idc != no_species) {
slist.set_nu_ji(id, idc, it.second);
test_charge += it.second*data->charge_component(idc);
}
else {
const auto idaq = data->aqueous.get_id(it.first);
if (idaq != no_species)
{
to_canonicalize.insert({idaq, it.second});
test_charge += it.second*data->charge_aqueous(idaq);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it.first
+ "' while parsing equations for " + slist.get_label(id) +".");
}
}
}
if (std::abs(test_charge) > PRECISION_TEST_CHARGE)
{
throw db_invalid_data("Total charge is not zero for gas '"+slist.get_label(id)+
"' : " + std::to_string(test_charge)+".");
}
// canonicalise
for (const auto& tocanon: to_canonicalize)
{
slist.canonicalize(id, data->aqueous, tocanon.first, tocanon.second);
}
}
slist.set_valid();
}
void DataReader::parse_compounds(Json::Value& compounds, CompoundList &clist)
{
DEBUG << "Parse compounds";
clist = CompoundList(compounds.size(), data->nb_component());
for (auto id: clist.range())
{
Json::Value& species = compounds[static_cast<int>(id)];
std::string label = obtain_label(species);
auto is_already_in_the_database = clist.get_id(label);
if (is_already_in_the_database != no_species)
throw invalid_database("Compounds : " + label + "is already in the database.");
clist.set_values(id, label);
std::map<std::string, scalar_t> compo;
obtain_composition(species, compo);
double test_charge = 0;
std::map<index_t, scalar_t> to_canonicalize;
for (auto it: compo)
{
const auto idc = data->components.get_id(it.first);
if(idc != no_species) {
clist.set_nu_ji(id, idc, it.second);
test_charge += it.second*data->charge_component(idc);
}
else {
const auto idaq = data->aqueous.get_id(it.first);
if (idaq != no_species)
{
to_canonicalize.insert({idaq, it.second});
test_charge += it.second*data->charge_aqueous(idaq);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it.first
+ "' while parsing equations for " + clist.get_label(id) +".");
}
}
}
if (std::abs(test_charge) > PRECISION_TEST_CHARGE)
{
throw db_invalid_data("Total charge is not zero for gas '"+clist.get_label(id)+
"' : " + std::to_string(test_charge)+".");
}
// canonicalise
for (const auto& tocanon: to_canonicalize)
{
clist.canonicalize(id, data->aqueous, tocanon.first, tocanon.second);
}
}
clist.set_valid();
}
void parse_equation(const std::string& equation, std::map<std::string, scalar_t> &compo)
{
std::vector<std::string> list_compo;
boost::split(list_compo, equation, [](char input){return input == ',';}, boost::token_compress_on);
for (auto it=list_compo.begin(); it!=list_compo.end(); ++it)
{
std::string& toparse = *it;
double coeff = 0;
boost::trim_all(toparse);
unsigned int pos_end = 0;
while (pos_end < toparse.size())
{
if (std::isalpha(toparse[pos_end])) // or std::isblank(toparse[pos_end]))
{
break;
}
++pos_end;
}
std::string label(toparse.substr(pos_end, toparse.size()-pos_end));
boost::trim_all(label);
if (pos_end == 0) coeff =1;
else if (pos_end == 1 and toparse[0] == '-') coeff=-1;
else
{
std::string tofloat = toparse.substr(0, pos_end);
while (pos_end > 1)
{
if ((tofloat[0] == '-' or tofloat[0] == '+')
and std::isspace(tofloat[1]) )
{
tofloat = tofloat[0] + tofloat.substr(2,pos_end-2);
--pos_end;
continue;
}
else
{
break;
}
}
if ( tofloat[pos_end-1] == '-' ) {coeff = -1;}
else if (tofloat[pos_end-1] == '+') {coeff = +1;}
else {coeff = std::stof(tofloat);}
}
compo[label] = coeff;
}
}
double charge_from_label(const std::string& label)
{
int sign=1;
auto start= label.end();
auto stop = label.end();
for (auto it=label.begin(); it != label.end(); ++it)
{
if (*it == INDB_OPEN_DELIMITER_CHARGE) { start = it; }
}
if (start == label.end()) {return 0;} // no charge specified
for (auto it=start+1; it != label.end(); ++it)
{
if (*it == INDB_CLOSE_DELIMITER_CHARGE) { stop = it; }
}
if (stop == label.end()) {throw db_invalid_syntax("Charge : missing closing delimiter for species : "+label+".");}
start = start+1;
if (stop == start) {return 0;} // nothing inside bracket
// get the signs
if (*(stop-1) == '-')
{
sign = -1;
stop =stop-1;
}
else if (*(stop-1) == '+')
{
sign = +1;
stop = stop-1;
}
if (stop == start)
{
return sign; // just the sign, |z|=1
}
double charge;
try
{
charge = sign*std::stof(std::string(start,stop));
}
catch (std::invalid_argument&)
{
throw db_invalid_syntax("Charge : bad formatting for species :"+ label+".");
}
return charge;
}
std::string obtain_label(Json::Value& species)
{
json::check_for_mandatory_member(species, INDB_ATTRIBUTE_LABEL);
return species[INDB_ATTRIBUTE_LABEL].asString();
}
void obtain_composition(Json::Value& species, std::map<std::string, scalar_t>& compo)
{
json::check_for_mandatory_member(species, INDB_ATTRIBUTE_COMPOSITION);
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
}
scalar_t obtain_logk(Json::Value& species)
{
json::check_for_mandatory_member(species, INDB_ATTRIBUTE_LOGK);
return species[INDB_ATTRIBUTE_LOGK].asDouble();
}
bool is_kinetic(Json::Value& species)
{
bool is_kin {false};
if (species.isMember(INDB_ATTRIBUTE_FLAG_KINETIC))
{
is_kin = species[INDB_ATTRIBUTE_FLAG_KINETIC].asBool();
}
return is_kin;
}
} // end namespace specmicp
} // end namespace chemy

Event Timeline