Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91961557
reader.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Nov 16, 04:12
Size
16 KB
Mime Type
text/x-c
Expires
Mon, Nov 18, 04:12 (2 d)
Engine
blob
Format
Raw Data
Handle
22354890
Attached To
rSPECMICP SpecMiCP / ReactMiCP
reader.cpp
View Options
/*-------------------------------------------------------
- Module : database
- File : reader.cpp
- Author : Fabien Georget
Copyright (c) 2014, Fabien Georget, Princeton University
---------------------------------------------------------*/
#include "reader.hpp"
#include "jsoncpp/json/reader.h"
#include <fstream>
#include <stdexcept>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim_all.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <locale>
#include "utils/log.hpp"
#define INDB_SECTION_BASIS "Basis"
#define INDB_SECTION_AQUEOUS "Aqueous"
#define INDB_SECTION_MINERALS "Minerals"
#define INDB_SECTION_GAS "Gas"
#define INDB_ATTRIBUTE_LABEL "label"
#define INDB_ATTRIBUTE_ACTIVITY "activity"
#define INDB_ATTRIBUTE_ACTIVITY_A "a"
#define INDB_ATTRIBUTE_ACTIVITY_B "b"
#define INDB_ATTRIBUTE_MOLARMASS "molar_mass"
#define INDB_ATTRIBUTE_MOLARVOLUME "molar_volume"
#define INDB_ATTRIBUTE_COMPOSITION "composition"
#define INDB_ATTRIBUTE_LOGK "log_k"
#define INDB_ATTRIBUTE_FLAG_KINETIC "flag_kinetic"
#define INDB_OPEN_DELIMITER_CHARGE '['
#define INDB_CLOSE_DELIMITER_CHARGE ']'
namespace specmicp {
namespace database {
void DataReader::parse()
{
std::ifstream datafile(m_filepath, std::ifstream::binary);
Json::Reader reader;
if (not datafile.is_open())
{
ERROR << "Database not found : " << m_filepath;
std::string message("Database not found : " + m_filepath);
throw std::invalid_argument(message);
}
DEBUG << "Parsing database";
bool parsingSuccessful = reader.parse( datafile, m_root);
if ( !parsingSuccessful )
{
// report to the user the failure and their locations in the document.
std::string message("Failed to parse configuration\n" + reader.getFormatedErrorMessages());
throw std::runtime_error(message);
}
parse_size_db();
parse_basis();
parse_aqueous();
parse_minerals();
parse_gas();
datafile.close();
}
void DataReader::check_database()
{
}
void DataReader::parse_size_db()
{
data->nb_component = m_root[INDB_SECTION_BASIS].size();
data->nb_aqueous = m_root[INDB_SECTION_AQUEOUS].size();
data->nb_mineral = m_root[INDB_SECTION_MINERALS].size();
data->nb_gas = m_root[INDB_SECTION_GAS].size();
data->param_aq = Eigen::MatrixXd::Zero(data->nb_component+data->nb_aqueous, 3);
}
void DataReader::parse_basis()
{
Json::Value& basis = get_mandatory_section(m_root, INDB_SECTION_BASIS);
DEBUG << "Size basis : " << basis.size();
int size_basis = basis.size();
data->labels_basis.reserve(size_basis);
data->molar_mass_basis.resize(size_basis);
for (int id=0; id<size_basis; ++id)
{
Json::Value& species = basis[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
SPAM << "Parsing component : " << label;
data->map_labels_basis[label] = id;
data->labels_basis.push_back(label);
data->param_aq(id, 0) = charge_from_label(label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
data->param_aq(id, 1) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
data->param_aq(id, 2) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_MOLARMASS);
data->molar_mass_basis(id) = species[INDB_ATTRIBUTE_MOLARMASS].asDouble();
}
}
void DataReader::parse_aqueous()
{
DEBUG << "Parse aqueous species";
Json::Value& aqueous = get_mandatory_section(m_root, INDB_SECTION_AQUEOUS);
data->labels_aqueous.reserve(data->nb_aqueous);
data->nu_aqueous = reaction_mat_t::Zero(data->nb_aqueous, data->nb_component+data->nb_aqueous);
data->logk_aqueous = logK_vector_t(data->nb_aqueous);
for (int id=0; id<data->nb_aqueous; ++id)
{
Json::Value& species = aqueous[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
data->labels_aqueous.push_back(label);
const int idparam = id + data->nb_component;
data->param_aq(idparam, 0) = charge_from_label(label);
if (species.isMember(INDB_ATTRIBUTE_ACTIVITY))
{
data->param_aq(idparam, 1) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_A].asDouble();
data->param_aq(idparam, 2) = species[INDB_ATTRIBUTE_ACTIVITY][INDB_ATTRIBUTE_ACTIVITY_B].asDouble();
}
// equation
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, float> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge=0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, int>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) {
data->nu_aqueous(id, search->second) = it->second;
test_charge += it->second*data->param_aq(search->second, 0);
}
else {
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
const int id_aq = (searchaq - data->labels_aqueous.begin()) + data->nb_component;
data->nu_aqueous(id, id_aq) = it->second;
test_charge += it->second*data->param_aq(id_aq, 0);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != data->param_aq(idparam, 0))
{
throw db_invalid_data("Total charge is not zero for aqueous species : '"+label+
"' - Charge-decomposition - charge species : "+
std::to_string(test_charge - data->param_aq(id, 0) )+".");
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
data->logk_aqueous(id) = species[INDB_ATTRIBUTE_LOGK].asDouble();
}
}
void DataReader::parse_minerals()
{
DEBUG << "Parse minerals";
Json::Value& minerals = get_mandatory_section(m_root, INDB_SECTION_MINERALS);
// find kinetic minerals
int nb_kin = 0;
for (int id=0; id<data->nb_mineral; ++id)
{
bool is_kin = false;
if (minerals[id].isMember(INDB_ATTRIBUTE_FLAG_KINETIC))
{
is_kin = minerals[id][INDB_ATTRIBUTE_FLAG_KINETIC].asBool();
}
if (is_kin) ++nb_kin;
}
data->nb_mineral = data->nb_mineral - nb_kin;
data->labels_minerals.reserve(data->nb_mineral);
data->nu_mineral = reaction_mat_t::Zero(data->nb_mineral, data->nb_component+data->nb_aqueous);
data->logk_mineral = logK_vector_t(data->nb_mineral);
data->_molar_volume_mineral.resize(data->nb_mineral);
data->nb_mineral_kinetic = nb_kin;
data->labels_minerals_kinetic.reserve(data->nb_mineral_kinetic);
data->nu_mineral_kinetic = reaction_mat_t::Zero(data->nb_mineral_kinetic, data->nb_component+data->nb_aqueous);
data->logk_mineral_kinetic = logK_vector_t(data->nb_mineral_kinetic);
data->_molar_volume_mineral_kinetic.resize(data->nb_mineral);
// id for each class of minerals
int id_in_eq=0;
int id_in_kin=0;
for (int id=0; id<data->nb_mineral+data->nb_mineral_kinetic; ++id)
{
Json::Value& species = minerals[id];
//check if material is at equilibrium or governed by kinetics
bool is_kin = false;
if (species.isMember(INDB_ATTRIBUTE_FLAG_KINETIC))
{
is_kin = species[INDB_ATTRIBUTE_FLAG_KINETIC].asBool();
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
const std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
if (is_kin) data->labels_minerals_kinetic.push_back(label);
else data->labels_minerals.push_back(label);
// equation
// --------
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, float> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge = 0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, int>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) { // this is a component
if (is_kin) data->nu_mineral_kinetic(id_in_kin, search->second) = it->second;
else data->nu_mineral(id_in_eq, search->second) = it->second;
test_charge += it->second * data->param_aq(search->second, 0);
}
else { // this is an aqueous species
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
int id_aq = searchaq-data->labels_aqueous.begin()+data->nb_component;
if (is_kin) data->nu_mineral_kinetic(id_in_kin,id_aq) = it->second;
else data->nu_mineral(id_in_eq, id_aq) = it->second;
test_charge += it->second * data->param_aq(id_aq, 0);
}
else // or something we don't know
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != 0)
{
throw db_invalid_data("Total charge is not zero for mineral : '"+label+
"' - total charge : "+std::to_string(test_charge)+".");
}
// log(K)
// ------
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
if (is_kin) data->logk_mineral_kinetic(id_in_kin) = species[INDB_ATTRIBUTE_LOGK].asDouble();
else data->logk_mineral(id_in_eq) = species[INDB_ATTRIBUTE_LOGK].asDouble();
// Molar volume
// -------------
if (species.isMember(INDB_ATTRIBUTE_MOLARVOLUME))
{
if (is_kin) data->_molar_volume_mineral_kinetic(id_in_kin) = species[INDB_ATTRIBUTE_MOLARVOLUME].asDouble();
else data->_molar_volume_mineral(id_in_eq) = species[INDB_ATTRIBUTE_MOLARVOLUME].asDouble();
}
else
{
if (is_kin) data->_molar_volume_mineral_kinetic(id_in_kin) = -1;
else data->_molar_volume_mineral(id_in_eq) = -1;
}
// update indices
if (is_kin) {++id_in_kin;}
else {++id_in_eq;}
}
assert(id_in_kin == data->nb_mineral_kinetic);
assert(id_in_eq == data->nb_mineral);
}
void DataReader::parse_gas()
{
DEBUG << "Parse gas";
Json::Value& gas = get_mandatory_section(m_root, INDB_SECTION_GAS);
data->labels_gas.reserve(data->nb_gas);
data->nu_gas = reaction_mat_t::Zero(data->nb_gas, data->nb_component+data->nb_aqueous);
data->logk_gas = logK_vector_t(data->nb_gas);
for (int id=0; id<data->nb_gas; ++id)
{
Json::Value& species = gas[id];
check_for_mandatory_value(species, INDB_ATTRIBUTE_LABEL);
std::string label = species[INDB_ATTRIBUTE_LABEL].asString();
data->labels_gas.push_back(label);
// equation
check_for_mandatory_value(species, INDB_ATTRIBUTE_COMPOSITION);
std::map<std::string, float> compo;
parse_equation(species[INDB_ATTRIBUTE_COMPOSITION].asString(), compo);
double test_charge=0;
for (auto it=compo.begin(); it != compo.end(); ++it)
{
std::map<std::string, int>::const_iterator search = data->map_labels_basis.find(it->first);
if(search != data->map_labels_basis.end()) {
data->nu_gas(id, search->second) = it->second;
test_charge += it->second*data->param_aq(search->second, 0);
}
else {
vector_labels_t::const_iterator searchaq = std::find(data->labels_aqueous.begin(),
data->labels_aqueous.end(),
it->first);
if (searchaq != data->labels_aqueous.end())
{
const int id_aq = (searchaq - data->labels_aqueous.begin()) + data->nb_component;
data->nu_gas(id, id_aq) = it->second;
test_charge += it->second*data->param_aq(id_aq, 0);
}
else
{
throw db_invalid_syntax("Unknown species : '" + it->first + "' while parsing equations for " + label +".");
}
}
}
if (test_charge != 0)
{
throw db_invalid_data("Total charge is not zero for gas '"+label+
"' : " + std::to_string(test_charge)+".");
}
check_for_mandatory_value(species, INDB_ATTRIBUTE_LOGK);
data->logk_gas(id) = species[INDB_ATTRIBUTE_LOGK].asDouble();
}
}
void parse_equation(const std::string& equation, std::map<std::string, float>& compo)
{
std::vector<std::string> list_compo;
boost::split(list_compo, equation, [](char input){return input == ',';}, boost::token_compress_on);
for (auto it=list_compo.begin(); it!=list_compo.end(); ++it)
{
std::string& toparse = *it;
double coeff = 0;
boost::trim_all(toparse);
unsigned int pos_end = 0;
while (pos_end < toparse.size())
{
if (std::isalpha(toparse[pos_end])) // or std::isblank(toparse[pos_end]))
{
break;
}
++pos_end;
}
std::string label(toparse.substr(pos_end, toparse.size()-pos_end));
boost::trim_all(label);
if (pos_end == 0) coeff =1;
else if (pos_end == 1 and toparse[0] == '-') coeff=-1;
else
{
std::string tofloat = toparse.substr(0, pos_end);
while (pos_end > 1)
{
if ((tofloat[0] == '-' or tofloat[0] == '+')
and std::isspace(tofloat[1]) )
{
tofloat = tofloat[0] + tofloat.substr(2,pos_end-2);
--pos_end;
continue;
}
else
{
break;
}
}
if ( tofloat[pos_end-1] == '-' ) {coeff = -1;}
else if (tofloat[pos_end-1] == '+') {coeff = +1;}
else {coeff = std::stof(tofloat);}
}
compo[label] = coeff;
}
}
double charge_from_label(const std::string& label)
{
int sign=1;
auto start= label.end();
auto stop = label.end();
for (auto it=label.begin(); it != label.end(); ++it)
{
if (*it == INDB_OPEN_DELIMITER_CHARGE) { start = it; }
}
if (start == label.end()) {return 0;} // no charge specified
for (auto it=start+1; it != label.end(); ++it)
{
if (*it == INDB_CLOSE_DELIMITER_CHARGE) { stop = it; }
}
if (stop == label.end()) {throw db_invalid_syntax("Charge : missing closing delimiter for species : "+label+".");}
start = start+1;
if (stop == start) {return 0;} // nothing inside bracket
// get the signs
if (*(stop-1) == '-')
{
sign = -1;
stop =stop-1;
}
else if (*(stop-1) == '+')
{
sign = +1;
stop = stop-1;
}
if (stop == start)
{
return sign; // just the sign, |z|=1
}
double charge;
try
{
charge = sign*std::stof(std::string(start,stop));
}
catch (std::invalid_argument&)
{
throw db_invalid_syntax("Charge : bad formatting for species :"+ label+".");
}
return charge;
}
} // end namespace specmicp
} // end namespace chemy
#undef INDB_SECTION_BASIS
#undef INDB_SECTION_AQUEOUS
#undef INDB_SECTION_MINERALS
#undef INDB_SECTION_GAS
#undef INDB_ATTRIBUTE_NAME
#undef INDB_ATTRIBUTE_LABEL
#undef INDB_ATTRIBUTE_ACTIVITY
#undef INDB_ATTRIBUTE_ACTIVITY_A
#undef INDB_ATTRIBUTE_ACTIVITY_B
#undef INDB_ATTRIBUTE_MOLARMASS
#undef INDB_ATTRIBUTE_COMPOSITION
#undef INDB_ATTRIBUTE_LOGK
#undef INDB_ATTRIBUTE_FLAG_KINETIC
#undef INDB_OPEN_DELIMITER_CHARGE
#undef INDB_CLOSE_DELIMITER_CHARGE
Event Timeline
Log In to Comment