diff --git a/importation-tool/ImportationTool.cpp b/importation-tool/ImportationTool.cpp index 9713745..29f1db8 100644 --- a/importation-tool/ImportationTool.cpp +++ b/importation-tool/ImportationTool.cpp @@ -1,794 +1,794 @@ #include #include #include #include #include #include #include "edu_harvard_i2b2_gen_util_CryptosystemBridge.cpp" #define MAX_ENTRY 10000 using namespace std; using namespace pqxx; connection* connectDB(string); void disconnectDB(connection*); result* NON_TRANSACTIONstatement(connection*, string); void TRANSACTIONstatement(connection*, string); void skipLine(ifstream*); void readBlock(char*, ifstream*); void skipBlock(ifstream*, unsigned short); void writeKeySet(FV::sk_t*, FV::evk_t*, FV::pk_t*); void loadKeySet(ifstream*, FV::sk_t*, FV::evk_t*, FV::pk_t*); void writeLittleEndian(ofstream*, uint64_t); bool isLast(ifstream*); void printUsage(char*); void generateRandomAlphaNumeric(char*, int); -void moreSpaceEncoding(uint64_t, int, unsigned int, ifstream*, streampos*, ofstream*, unsigned int, char*, FV::pk_t*); +void moreSpaceEncoding(uint64_t, int, unsigned int, unsigned int, ifstream*, streampos*, ofstream*, unsigned int, char*, FV::pk_t*); -void lessSpaceEncoding(uint64_t, int, unsigned int, ifstream*, streampos*, ofstream*, unsigned int, char*, FV::pk_t*); +void lessSpaceEncoding(uint64_t, int, unsigned int, unsigned int, ifstream*, streampos*, ofstream*, unsigned int, char*, FV::pk_t*); void encrypt(FV::pk_t*, FV::params::poly_p*, ofstream*); int main(int argc, char* argv[]) { if (argc < 2) { printUsage(argv[0]); return 1; } if (strcmp(argv[1], "-u") == 0) { if (argc != 5) { printUsage(argv[0]); return 1; } ifstream ifs; ofstream ofs; srand (time(NULL)); ifs.open(argv[2]); FV::params::poly_p sk; FV::params::poly_p pk; for (size_t i=0;i=2 && strcmp(argv[2], "-d") == 0){ clear_database=true; NON_TRANSACTIONstatement(C,"SELECT setval(\'public.patient_seq\',1)"); TRANSACTIONstatement(C,"DELETE FROM public.gen_variants;"); }else{ result* rs = NON_TRANSACTIONstatement(C,"SELECT nextval(\'public.patient_seq\')"); uint64_t aux = rs->front()[0].as(); delete rs; NON_TRANSACTIONstatement(C,"SELECT setval(\'public.patient_seq\',"+to_string(aux-1)+")"); } if( (argc >2 && strcmp(argv[2], "-i") == 0) || (argc >3 && strcmp(argv[3], "-i") == 0) ){ new_encoding=true; } ifstream ifs, keyset; ofstream ofs, res, aggregate; FV::sk_t* secret_key = new FV::sk_t(); FV::evk_t* evaluation_key = new FV::evk_t(*secret_key, 32); FV::pk_t* public_key = new FV::pk_t(*secret_key, *evaluation_key); // Write key set if ((argc == 3 && clear_database == false && new_encoding == false) || (argc == 4 && clear_database == true && new_encoding == false) || (argc == 4 && clear_database == false && new_encoding == true) || (argc == 5 && clear_database == true && new_encoding == true)) { cout << "Generate new keyset" << endl; writeKeySet(secret_key, evaluation_key, public_key); ifs.open(argv[argc-1]); } else if ((argc == 4 && clear_database == false && new_encoding == false) || (argc == 5 && clear_database == true && new_encoding == false) || (argc == 5 && clear_database == false && new_encoding == true) || (argc == 6 && clear_database == true && new_encoding == true)) { // Load exist key cout << "Load existing keyset" << endl; keyset.open(argv[argc-1]); loadKeySet(&keyset, secret_key, evaluation_key, public_key); keyset.close(); ifs.open(argv[argc-2]); } else { printUsage(argv[0]); return 1; } // skip header char c; do { skipLine(&ifs); ifs.seekg(1, ifs.cur); ifs.get(c); } while (c == '#'); // Get individuals number // skip column definition, 9 columns #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT are metadata // need to parse patient identifier int count=-9; do { skipBlock(&ifs, 1); count++; ifs.get(c); } while (c != '\n'); if (count <= 0) { cout << "No one is in given files" << endl; return 1; } cout << count << " individual(s) are found" << endl; result* rs = NON_TRANSACTIONstatement(C,"SELECT currval(\'public.patient_seq\')"); uint64_t patients_log = rs->front()[0].as(); delete rs; ofs.open("individuals.sql"); ofs << "insert into gen_patient_mapping(pid_gen) values" << endl; for (int i=0;i container; for(int param=0; param<5; param++){ char* buffer = (char*) malloc(1024*sizeof(char)); container.push_back(buffer); } char* buffer = (char*) malloc(1024*sizeof(char)); unsigned int cipherid; unsigned int degree; if(clear_database==true){ cipherid=0; degree=0; }else{ result* rs = NON_TRANSACTIONstatement(C,"SELECT cipher_id, degree FROM public.gen_variants ORDER BY cipher_id, degree DESC LIMIT 1;"); if(rs->size()>0){ cipherid = rs->front()[0].as(); degree = rs->front()[1].as(); if(degree= MAX_ENTRY) { res.seekp(-2, res.end); res << ';'; res.close(); fileId++; NbEntries = 0; aggregate << "\\i genotypes-" << to_string(fileId) << ".sql" << endl; res.open("genotypes-" + to_string(fileId) + ".sql"); if(new_encoding == false){ res << "insert into gen_encrypted_genotypes (cipher_id, pid_gen, cipher_gt1, cipher_gt2, no_calls) values" << endl; }else{ res << "insert into gen_encrypted_genotypes (cipher_id, pid_gen, cipher_gt1, cipher_gt2, cipher_gt3, cipher_gt4, cipher_gt5) values" << endl; } } // parse variant information for (size_t i=degree;ifront()[0].as(); delete rs; if(exists == false){ new_variants = true; TRANSACTIONstatement(C,insert_query); // if variant does not yet exist write to file ofs << '(' << cipherid << ", " << i << ", \'"; // cipher_id ofs << container[0] << "\', "; // chromosome ofs << container[1] << ", "; // position // rs_number if (*container[2] == '.') { ofs << "null"; } else { ofs << '\'' << container[2] << '\''; } ofs << ", \'"; ofs << container[3] << "\', \'"; // reference ofs << container[4] << "\'),"; // alternate ofs << endl; } skipBlock(&ifs, 4); *(pos+i-degree) = ifs.tellg(); skipLine(&ifs); if (isLast(&ifs)) { n=i+1-degree; hasNext = false; ifs.clear(); } degree = 0; } if(new_encoding==false){ lessSpaceEncoding(patients_log,count,degree,n,&ifs,pos,&res,cipherid,buffer,public_key); NbEntries += 2*count; }else{ moreSpaceEncoding(patients_log,count,degree,n,&ifs,pos,&res,cipherid,buffer,public_key); NbEntries += 5*count; } skipLine(&ifs); cipherid++; variants += n; cout << variants << " variants are processed" << endl; } // replace , of last column to ; ofs.seekp(-2, ofs.end); ofs << ';'; res.seekp(-2, res.end); res << ';'; ifs.close(); ofs.close(); if(new_variants == false){ ofs.open("variants.sql"); ofs.close(); } res.close(); aggregate.close(); for(size_t i=0;iseekg(*(pos+j-degree)); readBlock(buffer,ifs); *(pos+j-degree) = ifs->tellg(); if (*buffer == '0') { if (*(buffer+2) == '0') { gt1(0,j) = (uint64_t)1; gt2(0,j) = (uint64_t)0; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)0; gt5(0,j) = (uint64_t)2; } else if (*(buffer+2) == '.') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)0; gt5(0,j) = (uint64_t)1; } else { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)1; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)1; gt5(0,j) = (uint64_t)2; } } else if (*buffer == '.') { if (*(buffer+2) == '0') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)0; gt5(0,j) = (uint64_t)1; } else if (*(buffer+2) == '.') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)0; gt5(0,j) = (uint64_t)0; } else { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)1; gt5(0,j) = (uint64_t)1; } } else { if (*(buffer+2) == '0') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)1; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)1; gt5(0,j) = (uint64_t)2; } else if (*(buffer+2) == '.') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; gt3(0,j) = (uint64_t)0; gt4(0,j) = (uint64_t)1; gt5(0,j) = (uint64_t)1; } else { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; gt3(0,j) = (uint64_t)1; gt4(0,j) = (uint64_t)2; gt5(0,j) = (uint64_t)2; } } } *res << '(' << dec << cipherid << ',' << i; encrypt(public_key,>1,res); encrypt(public_key,>2,res); encrypt(public_key,>3,res); encrypt(public_key,>4,res); encrypt(public_key,>5,res); *res << ")," << endl; } } -void lessSpaceEncoding(uint64_t patients_log, int count, unsigned int degree, unsigned n, ifstream* ifs, streampos* pos, ofstream* res, unsigned int cipherid, char* buffer, FV::pk_t* public_key){ +void lessSpaceEncoding(uint64_t patients_log, int count, unsigned int degree, unsigned int n, ifstream* ifs, streampos* pos, ofstream* res, unsigned int cipherid, char* buffer, FV::pk_t* public_key){ for (uint64_t i=patients_log; iseekg(*(pos+j-degree)); readBlock(buffer,ifs); *(pos+j-degree) = ifs->tellg(); if (j%4 == 0) { *(noc+j/4)=0; } if (*buffer == '0') { if (*(buffer+2) == '0') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; } else if (*(buffer+2) == '.') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)1; *(noc+j/4) |= 0x01 << 2*(j%4); } else { gt1(0,j) = (uint64_t)1; gt2(0,j) = (uint64_t)0; } } else if (*buffer == '.') { if (*(buffer+2) == '0') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)1; *(noc+j/4) |= 0x01 << 2*(j%4); } else if (*(buffer+2) == '.') { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)0; *(noc+j/4) |= 0x02 << 2*(j%4); } else { gt1(0,j) = (uint64_t)1; gt2(0,j) = (uint64_t)0; *(noc+j/4) |= 0x01 << 2*(j%4); } } else { if (*(buffer+2) == '0') { gt1(0,j) = (uint64_t)1; gt2(0,j) = (uint64_t)0; } else if (*(buffer+2) == '.') { gt1(0,j) = (uint64_t)1; gt2(0,j) = (uint64_t)0; *(noc+j/4) |= 0x01 << 2*(j%4); } else { gt1(0,j) = (uint64_t)0; gt2(0,j) = (uint64_t)1; } } } for (size_t j=n;jc0)(0,k)); } for (size_t k=0;kc1)(0,k)); } *res << "'"; delete cipher; } connection* connectDB(string dbname){ try{ connection* C = new connection(dbname); if (C->is_open()) { cout << "Opened database successfully: " << C->dbname() << endl; } else { cout << "Can't open database" << endl; exit(1); } return C; }catch (const std::exception &e){ cerr << e.what() << std::endl; exit(1); } } void disconnectDB(connection* C){ try{ C->disconnect (); cout << "Closed database successfully." << endl; }catch (const std::exception &e){ cerr << e.what() << std::endl; exit(1); } } result* NON_TRANSACTIONstatement(connection* C, string statement){ try{ /* Create SQL statement */ string sql = statement; /* Create a non-transactional object. */ nontransaction N(*C); /* Execute SQL query */ result* R = new result(N.exec(sql)); return R; }catch (const std::exception &e){ cerr << e.what() << std::endl; exit(1); } } void TRANSACTIONstatement(connection* C, string statement){ try{ /* Create SQL statement */ string sql = statement; /* Create a transactional object. */ work W(*C); /* Execute SQL query */ W.exec(sql); W.commit(); }catch (const std::exception &e){ cerr << e.what() << std::endl; exit(1); } } void skipLine(ifstream* ifs) { char c; do { ifs->get(c); } while (c != '\n'); } void readBlock(char* block, ifstream* ifs) { char c; do { ifs->get(c); } while (c == ' ' || c == '\t'); do { *block = toupper(c); ifs->get(c); block++; } while (c != ' ' && c != '\t' && c != '\n'); ifs->seekg(-1, ifs->cur); *block = '\0'; } void skipBlock(ifstream* ifs, unsigned short n) { char c; for (size_t i=0;iget(c); } while (c == ' ' || c == '\t'); do { ifs->get(c); } while (c != ' ' && c != '\t' && c != '\n'); } ifs->seekg(-1, ifs->cur); } void writeKeySet(FV::sk_t* sk, FV::evk_t* evk, FV::pk_t* pk) { ofstream ofs; ofs.open("keyset"); // write secret key for (size_t i=0;ivalue)(0,i), sizeof(uint64_t)); } // write public key for (size_t i=0;ia)(0,i), sizeof(uint64_t)); } for (size_t i=0;ib)(0,i), sizeof(uint64_t)); } // write evaluation key for (size_t i=0; iell; i++) { for (size_t j=0; jvalues+i))(0,j), sizeof(uint64_t)); } } ofs.close(); ofs.open("parameters.sql"); ofs << "insert into gen_parameters (poly_degree, nb_moduli, public_key, evaluation_key, plaintext_modulus) values" << endl; ofs << '(' << std::dec << POLY_DEGREE << ", " << FV::params::poly_p::nmoduli << ", E'\\\\x"; for (size_t i=0;ia)(0,i)); } for (size_t i=0;ib)(0,i)); } ofs << "', E'\\\\x"; for (size_t i=0; iell; i++) { for (size_t j=0; jvalues+i))(0,j)); } } ofs << "', " << std::dec << FV::params::plaintextModulus::value() << ");" << endl; ofs.close(); } void loadKeySet(ifstream* ifs, FV::sk_t* sk, FV::evk_t* evk, FV::pk_t* pk) { for (size_t i=0;iread((char*)&(sk->value)(0,i), sizeof(uint64_t)); } for (size_t i=0;iread((char*)&(pk->a)(0,i), sizeof(uint64_t)); } for (size_t i=0;iread((char*)&(pk->b)(0,i), sizeof(uint64_t)); } for (size_t i=0; iell; i++) { for (size_t j=0; jread((char*)&(*(*evk->values+i))(0,j), sizeof(uint64_t)); } } pk->a_shoup = nfl::compute_shoup(pk->a); pk->b_shoup = nfl::compute_shoup(pk->b); } void writeLittleEndian(ofstream* ofs, uint64_t v) { *ofs << setfill('0') << setw(2) << hex << (int) (v&0xFF); *ofs << setfill('0') << setw(2) << (int) ((v >> 8) & 0xFF); *ofs << setfill('0') << setw(2) << (int) ((v >> 16) & 0xFF); *ofs << setfill('0') << setw(2) << (int) ((v >> 24) & 0xFF); *ofs << setfill('0') << setw(2) << (int) ((v >> 32) & 0xFF); *ofs << setfill('0') << setw(2) << (int) ((v >> 40) & 0xFF); *ofs << setfill('0') << setw(2) << (int) ((v >> 48) & 0xFF); *ofs << setfill('0') << setw(2) << (int) ((v >> 56) & 0xFF); } bool isLast(ifstream* ifs) { ifs->get(); if (ifs->eof()) return true; ifs->seekg(-1, ifs->cur); return false; } void printUsage(char* filename) { cerr << "Usage : " << filename << " -f [-d] [-i] VCF_FILE KEYSET_FILE" << endl; cerr << " Generate SQL insertion query files from VCF file" << endl; cerr << "Usage : " << filename << " -f [-d] [-i] VCF_FILE" << endl; cerr << " Generate SQL insertion query files from VCF file" << endl; cerr << " New keyset and encryption parameters will be generated" << endl; cerr << "Usage : " << filename << " -u KEYSET_FILE USER_NAME ACCESS_LEVEL" << endl; cerr << " Create new user, USER_NAME must be username of i2b2" << endl; } void generateRandomAlphaNumeric(char* target, int length) { const char charset[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~!@#$^*()_+{}[]"; for (int i=0;i