diff --git a/shrine-webclient/src/main/tools/create_tables/createTables.go b/shrine-webclient/src/main/tools/create_tables/createTables.go index eb150135f..510c22edd 100644 --- a/shrine-webclient/src/main/tools/create_tables/createTables.go +++ b/shrine-webclient/src/main/tools/create_tables/createTables.go @@ -1,425 +1,415 @@ package main import ( "log" "os" "bufio" "strings" "encoding/json" "io/ioutil" + "sort" //"gopkg.in/dedis/crypto.v0/abstract" ) // NOTE: the name of the created .csv file is the same name that should be given to the postgres tables. // ---------------------- // SET THESE BEFORE RUNNING THE MAIN // set the path of the SHRINE_ONT_GENOMIC_ANNOTATIONS.csv file const fromFile = "/home/niccolo/Downloads/new_tables/SHRINE_ONT_GENOMIC_ANNOTATIONS.csv" // set the path in which we will store the new tables and files const topath = "/home/niccolo/Downloads/new_tables/tables/" // ---------------------- // make sure you have the names of the following needed fields +const chromosome string = "Chr" +const start_pos string = "Start Position" +const ref_allele string = "Reference Allele" +const tum_allele1 string = "Tumor Seq Allele1" +const tum_allele2 string = "Tumor Seq Allele2" +const end_position string = "End Position" +const hugo_symbol string = "Hugo Gene Symbol" +const protein_change = "Protein Change" //const chromosome string = "Chromosome" //const start_pos string = "Start_Position" //const ref_allele string = "Reference_Allele" //const tum_allele1 string = "Tumor_Seq_Allele1" //const tum_allele2 string = "Tumor_Seq_Allele2" //const end_position string = "End_Position" //const hugo_symbol string = "Hugo_Symbol" - -const chromosome string = "Chr" -const start_pos string = "Start Position" -const ref_allele string = "Reference Allele" -const tum_allele1 string = "Tumor Seq Allele1" -const tum_allele2 string = "Tumor Seq Allele2" -const end_position string = "End Position" -const hugo_symbol string = "Hugo Gene Symbol" +//const protein_change = "..." // ---------------------- -// to request variant ids with popups +// used to request variant ids with popups const toOntGenomicNew = "genomic_annotations.csv" -// to have autocompletion in gene name popup -const toGenes = "gene_values.csv" -// to store the json map annotation name -> list of possible values (this is not a database table) +// used to store the json map annotation name -> list of possible values (this is not a database table) const toAnnotationToValueMap = "annotationToValue.json" +// folder in which we will store a file for each annotation +const annotation_tables_folder = "annotation_tables/" // ---------------------- func main(){ // if it does not already exists, create the path in which we will store all the created tables if _, err := os.Stat(topath); os.IsNotExist(err) { os.Mkdir(topath, os.ModePerm) } // --- - // create and store the map (in a .json file) from the annotation name to the list of possible values to avoid - // recomputing it if needed again - - storeAnnotationToValuesMap(createAnnotationToValuesMap()) + // create the genomic_annotations.csv + createGenomicAnnotationsTable() // --- - // create the both the genomic_annotations and gene_values tables and store them in .csv - - loadAndCreate() + // create and store the map (in a .json file) from the annotation name to the list of possible values to avoid + // recomputing it if needed again. + // run this only if you have not already created the file with the map + storeAnnotationToValuesMap(createAnnotationToValuesMap()) // --- // create one table per annotation name with all its possible values and one table (annotation_names) with all // the annotation names. These tables are needed for the popups autocompletion. annotationToValues := loadAnnotationToValuesMap() - //println("loaded") createAnnotationsTables(annotationToValues) - //println("tables created") createAnnotationsNamesTable(annotationToValues) // --- // create the annotation-meta file (not needed anymore) //annotationToValues := loadAnnotationToValuesMap() //createAnnotationMetadataFile(annotationToValues) } -func loadAndCreate() error{ +func createGenomicAnnotationsTable() error{ // open all the files, I'll write in them as I read to avoid to load all the csv in go (just one line per time) from, err := os.Open(fromFile) defer from.Close() if err != nil { log.Fatal("Error while opening", fromFile) return nil } ontGenomicFile, err := os.Create(topath + toOntGenomicNew) defer ontGenomicFile.Close() if err != nil { log.Fatal("Error while opening", topath + toOntGenomicNew) return err } - genesFile, err := os.Create(topath + toGenes) - defer genesFile.Close() - if err != nil { - log.Fatal("Error while opening", topath + toGenes) - return err - } - // replacers for strange annotation values. repl2(repl1(str)) repl1 := strings.NewReplacer( ",;", " ", ";,", " ", ";;", " ", ",,", " ", ) repl2 := strings.NewReplacer( ",", " ", ";", " ", ) // just keep the list of Hugo_symbol values already written in the file (I don't want to repeat values) - var hugoValues = []string{"?"} // ? so that if some hugo symbol is missing in a row I don't write "?" in genesFile var annotationToValue = make(map[string]string) // scanner.Text() returns "-8772697417129649152","{""Hugo_Symbol"":""PI4KB"", ""Entrez_Gene_Id"":""0"", ""Center"":""broad.mit.edu"", ""NCBI_Build"":""GRCh37"", ""Chromosome"":""1"", ""Start_Position"":""151287991"", ""End_Position"":""151287991"", ""Strand"":""-1"", ""Variant_Classification"":""Intron"", ""Variant_Type"":""SNP"", ""Reference_Allele"":""T"", ""Tumor_Seq_Allele1"":""C"", ""Tumor_Seq_Allele2"":""C"", ""dbSNP_RS"":"""", ""dbSNP_Val_Status"":"""", ""Tumor_Sample_Barcode"":""ME002"", ""Matched_Norm_Sample_Barcode"":""ME002-Normal"", ""Match_Norm_Seq_Allele1"":""T"", ""Match_Norm_Seq_Allele2"":""T"", ""Tumor_Validation_Allele1"":"""", ""Tumor_Validation_Allele2"":"""", ""Match_Norm_Validation_Allele1"":"""", ""Match_Norm_Validation_Allele2"":"""", ""Verification_Status"":"""", ""Validation_Status"":"""", ""Mutation_Status"":"""", ""Sequencing_Phase"":"""", ""Sequence_Source"":"""", ""Validation_Method"":"""", ""Score"":"""", ""BAM_File"":"""", ""Sequencer"":""Illumina GAIIx"", ""MA:FImpact"":"""", ""MA:FIS"":"""", ""MA:protein.change"":"""", ""MA:link.MSA"":"""", ""MA:link.PDB"":"""", ""MA:link.var"":"""", ""Tumor_Sample_UUID"":"""", ""Matched_Norm_Sample_UUID"":"""", ""HGVSc"":""c.909+58A>G"", ""HGVSp"":"""", ""HGVSp_Short"":"""", ""Transcript_ID"":""ENST00000368873"", ""Exon_Number"":"""", ""t_depth"":""0"", ""t_ref_count"":""."", ""t_alt_count"":""."", ""n_depth"":""0"", ""n_ref_count"":""."", ""n_alt_count"":""."", ""all_effects"":""PI4KB,intron_variant,,ENST00000368875,NM_002651.2;PI4KB,intron_variant,,ENST00000368874,NM_001198774.1;PI4KB,intron_variant,,ENST00000271657,;PI4KB,intron_variant,,ENST00000368873,;PI4KB,intron_variant,,ENST00000368872,NM_001198773.1;PI4KB,intron_variant,,ENST00000529142,NM_001198775.1;PI4KB,intron_variant,,ENST00000430800,;PI4KB,intron_variant,,ENST00000489223,;PI4KB,downstream_gene_variant,,ENST00000438243,;"", ""Allele"":""C"", ""Gene"":""ENSG00000143393"", ""Feature"":""ENST00000368873"", ""Feature_type"":""Transcript"", ""Consequence"":""intron_variant"", ""cDNA_position"":""-/3340"", ""CDS_position"":""-/2451"", ""Protein_position"":""-/816"", ""Amino_acids"":"""", ""Codons"":"""", ""Existing_variation"":"""", ""ALLELE_NUM"":""1"", ""DISTANCE"":"""", ""SYMBOL"":""PI4KB"", ""SYMBOL_SOURCE"":""HGNC"", ""HGNC_ID"":""8984"", ""BIOTYPE"":""protein_coding"", ""CANONICAL"":"""", ""CCDS"":"""", ""ENSP"":""ENSP00000357867"", ""SWISSPROT"":""PI4KB_HUMAN"", ""TREMBL"":""F8W860_HUMAN,E9PL47_HUMAN,E9PIH4_HUMAN"", ""UNIPARC"":""UPI0000040634"", ""RefSeq"":"""", ""SIFT"":"""", ""PolyPhen"":"""", ""EXON"":"""", ""INTRON"":""02.nov"", ""DOMAINS"":"""", ""GMAF"":"""", ""AFR_MAF"":"""", ""AMR_MAF"":"""", ""ASN_MAF"":"""", ""EAS_MAF"":"""", ""EUR_MAF"":"""", ""SAS_MAF"":"""", ""AA_MAF"":"""", ""EA_MAF"":"""", ""CLIN_SIG"":"""", ""SOMATIC"":"""", ""PUBMED"":"""", ""MOTIF_NAME"":"""", ""MOTIF_POS"":"""", ""HIGH_INF_POS"":"""", ""MOTIF_SCORE_CHANGE"":"""", ""IMPACT"":""MODIFIER"", ""PICK"":"""", ""VARIANT_CLASS"":""SNV"", ""TSL"":"""", ""HGVS_OFFSET"":"""", ""PHENO"":"""", ""MINIMISED"":""1"", ""ExAC_AF"":"""", ""ExAC_AF_AFR"":"""", ""ExAC_AF_AMR"":"""", ""ExAC_AF_EAS"":"""", ""ExAC_AF_FIN"":"""", ""ExAC_AF_NFE"":"""", ""ExAC_AF_OTH"":"""", ""ExAC_AF_SAS"":"""", ""GENE_PHENO"":"""", ""FILTER"":"".""}" scanner := bufio.NewScanner(from) i := 0 // there are 232 211 lines to read for scanner.Scan() { i = i+1 if i % 10000 == 0{ println(i) //return nil // todo uncomment this to test if you don't want to scan the whole file } var variantNumber string var variantName string // split the row in variant identifier and list of genomic annotations row := strings.SplitN(scanner.Text(), ",", 2) variantNumber = row[0][1:len(row[0])-1] // divide the annotations (list of "annotationName"":""annotationValue") annotationsList := strings.Split(row[1][4:len(row[1])-4], "\"\", \"\"") // store the current row in map annotationToValue = map[string]string{} for _, annot := range annotationsList { temp := strings.Split(annot, "\"\":\"\"") annotationName := temp[0] annotationValue := temp[1] if annotationValue == "" { continue } if annotationValue == "-" { continue } if annotationValue == "Unknown" { continue } // just check this line don't a duplicated annotation (shouldn't happen) _, ok := annotationToValue[annotationName] if ok {println("Error! Variant", variantNumber, "contains duplicated", annotationName)} annotationToValue[annotationName] = repl2.Replace(repl1.Replace(annotationValue)) } - // once the whole line is loaded let's write it into the files - // first get the t_depth and build the variant name chr:pos:r>A - t_depth := "0" // todo if it not specified what value should I put? - // todo remove t_depth - d, ok := annotationToValue["t_depth"] - if ok{t_depth = d} - delete(annotationToValue, "t_depth") + // now create the row to be stored in the file + // COLUMN 1: variant number + ontGenomicLine := "\"" + variantNumber + "\"" + + // COLUMN 2: variant name (chr:pos:r>A) // if there is no value then put "?" chrom := "?" if a, ok := annotationToValue[chromosome]; ok {chrom = a} - startPos := "?" if a, ok := annotationToValue[start_pos]; ok {startPos = a} - refAllele := "?" if a, ok := annotationToValue[ref_allele]; ok {refAllele = a} // just get one of the two allele := "?" if a, ok := annotationToValue[tum_allele1]; ok { allele = a }else if a, ok := annotationToValue[tum_allele2]; ok { allele = a } variantName = chrom + ":" + - startPos + ":" + - refAllele + ">"+ - allele + startPos + ":" + + refAllele + ">"+ + allele // we don't need these entries anymore delete(annotationToValue, chromosome) delete(annotationToValue, start_pos) delete(annotationToValue, end_position) delete(annotationToValue, ref_allele) - - // GENOMIC ONTOLOGY - // first column: variant number - ontGenomicLine := "\"" + variantNumber + "\"" - - // second column: variant name ontGenomicLine += ",\"" + variantName + "\"" - // third column: Hugo_SymbolValue;(homozygous/heterozygous/missing);annotationName1=annotationValue1;annotationName2=annotationValue2;... + // COLUMN 3: hugo symbol hugoValue := "?" if a, ok := annotationToValue[hugo_symbol]; ok { hugoValue = a - }else{ - // todo lot have no Hugo_Symbol field - // println("Error! Variant", variantNumber, "contains no HugoSymbol") } + ontGenomicLine += ",\"" + hugoValue + "\"" + delete(annotationToValue, hugo_symbol) + + // COLUMN 4: protein change + proteinChange := "?" + if a, ok := annotationToValue[protein_change]; ok { + proteinChange = a + } + ontGenomicLine += ",\"" + proteinChange + "\"" + delete(annotationToValue, protein_change) + // COLUMN 5: (Homozygous/Heterozygous/Unknown);annotationName1=annotationValue1;annotationName2=annotationValue2;... // tum_allele1 tum_allele2 // nil nil Unknown // A nil Unknown // nil B Unknown // A B Heterozygous // A A Homozygous alleleType := "" a1, ok1 := annotationToValue[tum_allele1] a2, ok2 := annotationToValue[tum_allele2] if !ok1 || !ok2{ alleleType = "Unknown" }else if a1==a2{ alleleType = "Homozygous" }else{ alleleType = "Heterozygous" } - ontGenomicLine += ",\"" + hugoValue + ";" + alleleType + ontGenomicLine += ",\"" + alleleType - delete(annotationToValue, hugo_symbol) delete(annotationToValue, tum_allele1) delete(annotationToValue, tum_allele2) for anName, anValue := range annotationToValue { ontGenomicLine += ";" + anName + "=" + anValue } ontGenomicLine += "\"" - // fourth column: t_depth - ontGenomicLine += ",\"" + t_depth + "\"" + //// last column: t_depth + //t_depth := "0" // todo if it not specified what value should I put? + //// todo remove t_depth + //d, ok := annotationToValue["t_depth"] + //if ok{t_depth = d} + //delete(annotationToValue, "t_depth") + //ontGenomicLine += ",\"" + t_depth + "\"" - newline := "\n" // write the row in a new line + // write the new row to file + newline := "\n" if i == 1 { newline = "" } _, err = ontGenomicFile.WriteString(newline + ontGenomicLine) if err != nil { log.Fatal("Error in writing in the genomic file", err) return err } - - // GENES - if !containsOrAdd(&hugoValues, hugoValue) { - _, err = genesFile.WriteString(newline + "\"" + hugoValue + "\"") - if err != nil { - log.Fatal("Error in writing in the genomic file", err) - return err - } - } } if err := scanner.Err(); err != nil { log.Fatal(err) return err } return nil } func createAnnotationToValuesMap() map[string]*[]string{ fp, err := os.Open(fromFile) defer fp.Close() if err != nil { log.Fatal("Error while opening", fromFile) return nil } var annotationToValues = make(map[string]*[]string) // scanner.Text() returns "-8772697417129649152","{""Hugo_Symbol"":""PI4KB"", ""Entrez_Gene_Id"":""0"", ""Center"":""broad.mit.edu"", ""NCBI_Build"":""GRCh37"", ""Chromosome"":""1"", ""Start_Position"":""151287991"", ""End_Position"":""151287991"", ""Strand"":""-1"", ""Variant_Classification"":""Intron"", ""Variant_Type"":""SNP"", ""Reference_Allele"":""T"", ""Tumor_Seq_Allele1"":""C"", ""Tumor_Seq_Allele2"":""C"", ""dbSNP_RS"":"""", ""dbSNP_Val_Status"":"""", ""Tumor_Sample_Barcode"":""ME002"", ""Matched_Norm_Sample_Barcode"":""ME002-Normal"", ""Match_Norm_Seq_Allele1"":""T"", ""Match_Norm_Seq_Allele2"":""T"", ""Tumor_Validation_Allele1"":"""", ""Tumor_Validation_Allele2"":"""", ""Match_Norm_Validation_Allele1"":"""", ""Match_Norm_Validation_Allele2"":"""", ""Verification_Status"":"""", ""Validation_Status"":"""", ""Mutation_Status"":"""", ""Sequencing_Phase"":"""", ""Sequence_Source"":"""", ""Validation_Method"":"""", ""Score"":"""", ""BAM_File"":"""", ""Sequencer"":""Illumina GAIIx"", ""MA:FImpact"":"""", ""MA:FIS"":"""", ""MA:protein.change"":"""", ""MA:link.MSA"":"""", ""MA:link.PDB"":"""", ""MA:link.var"":"""", ""Tumor_Sample_UUID"":"""", ""Matched_Norm_Sample_UUID"":"""", ""HGVSc"":""c.909+58A>G"", ""HGVSp"":"""", ""HGVSp_Short"":"""", ""Transcript_ID"":""ENST00000368873"", ""Exon_Number"":"""", ""t_depth"":""0"", ""t_ref_count"":""."", ""t_alt_count"":""."", ""n_depth"":""0"", ""n_ref_count"":""."", ""n_alt_count"":""."", ""all_effects"":""PI4KB,intron_variant,,ENST00000368875,NM_002651.2;PI4KB,intron_variant,,ENST00000368874,NM_001198774.1;PI4KB,intron_variant,,ENST00000271657,;PI4KB,intron_variant,,ENST00000368873,;PI4KB,intron_variant,,ENST00000368872,NM_001198773.1;PI4KB,intron_variant,,ENST00000529142,NM_001198775.1;PI4KB,intron_variant,,ENST00000430800,;PI4KB,intron_variant,,ENST00000489223,;PI4KB,downstream_gene_variant,,ENST00000438243,;"", ""Allele"":""C"", ""Gene"":""ENSG00000143393"", ""Feature"":""ENST00000368873"", ""Feature_type"":""Transcript"", ""Consequence"":""intron_variant"", ""cDNA_position"":""-/3340"", ""CDS_position"":""-/2451"", ""Protein_position"":""-/816"", ""Amino_acids"":"""", ""Codons"":"""", ""Existing_variation"":"""", ""ALLELE_NUM"":""1"", ""DISTANCE"":"""", ""SYMBOL"":""PI4KB"", ""SYMBOL_SOURCE"":""HGNC"", ""HGNC_ID"":""8984"", ""BIOTYPE"":""protein_coding"", ""CANONICAL"":"""", ""CCDS"":"""", ""ENSP"":""ENSP00000357867"", ""SWISSPROT"":""PI4KB_HUMAN"", ""TREMBL"":""F8W860_HUMAN,E9PL47_HUMAN,E9PIH4_HUMAN"", ""UNIPARC"":""UPI0000040634"", ""RefSeq"":"""", ""SIFT"":"""", ""PolyPhen"":"""", ""EXON"":"""", ""INTRON"":""02.nov"", ""DOMAINS"":"""", ""GMAF"":"""", ""AFR_MAF"":"""", ""AMR_MAF"":"""", ""ASN_MAF"":"""", ""EAS_MAF"":"""", ""EUR_MAF"":"""", ""SAS_MAF"":"""", ""AA_MAF"":"""", ""EA_MAF"":"""", ""CLIN_SIG"":"""", ""SOMATIC"":"""", ""PUBMED"":"""", ""MOTIF_NAME"":"""", ""MOTIF_POS"":"""", ""HIGH_INF_POS"":"""", ""MOTIF_SCORE_CHANGE"":"""", ""IMPACT"":""MODIFIER"", ""PICK"":"""", ""VARIANT_CLASS"":""SNV"", ""TSL"":"""", ""HGVS_OFFSET"":"""", ""PHENO"":"""", ""MINIMISED"":""1"", ""ExAC_AF"":"""", ""ExAC_AF_AFR"":"""", ""ExAC_AF_AMR"":"""", ""ExAC_AF_EAS"":"""", ""ExAC_AF_FIN"":"""", ""ExAC_AF_NFE"":"""", ""ExAC_AF_OTH"":"""", ""ExAC_AF_SAS"":"""", ""GENE_PHENO"":"""", ""FILTER"":"".""}" scanner := bufio.NewScanner(fp) // todo remove i (get all rows) i := 0 // there are 232 211 lines to read for scanner.Scan() { i = i+1 if i % 1000 == 0{ //println(i) //return annotationToValues } // drop the mutation number annotationsString := strings.SplitN(scanner.Text(), ",", 2)[1] // divide the annotations (list of "annotationName"":""annotationValue") annotationsList := strings.Split(annotationsString[4:len(annotationsString)-4], "\"\", \"\"") for _, annot := range annotationsList { temp := strings.Split(annot, "\"\":\"\"") annotationName := temp[0] annotationValue := temp[1] if annotationValue == "" { continue } if annotationValue == "-" { continue } if annotationValue == "Unknown" { continue } _, ok := annotationToValues[annotationName] // it is the first time I see the annotations, then add to the map if !ok { annotationToValues[annotationName] = &[]string{} } // add the new value if not already in the list containsOrAdd(annotationToValues[annotationName], annotationValue) //// check if the value is already in the map //alreadypresent := false //for _, val := range annotationToValues[annotationName]{ // if val == annotationValue { // alreadypresent = true // break // } //} // //if !alreadypresent{ // annotationToValues[annotationName] = append(annotationToValues[annotationName], annotationValue) //} } } if err := scanner.Err(); err != nil { log.Fatal(err) } + + // sort all the values + for _, anValues := range annotationToValues{ + sort.Strings(*anValues) + } return annotationToValues } func storeAnnotationToValuesMap(annotationToValues map[string]*[]string){ jsonString, err := json.Marshal(annotationToValues) if err != nil {print("Error marshalling to json"); return} annofile, err := os.Create(topath + toAnnotationToValueMap) if err != nil {log.Fatal("Error while creating", topath + toAnnotationToValueMap); return} defer annofile.Close() _, err = annofile.Write(jsonString) if err != nil {log.Fatal("Error in writing in the genomic file", err)} } func loadAnnotationToValuesMap() (annotationToValues map[string]*[]string){ annofile, err := os.Open(topath + toAnnotationToValueMap) if err != nil { log.Fatal("Error while opening", topath + toAnnotationToValueMap); return } defer annofile.Close() //var bytes = make([]byte, 5) bytes, err := ioutil.ReadAll(annofile) //_, err = annofile.Read(bytes) if err != nil {log.Fatal("Error while reading", topath + toAnnotationToValueMap, err); return} err = json.Unmarshal(bytes, &annotationToValues) if err != nil {println("Error unmarshalling from json:", err.Error()); return} return } // given the map create one csv file per annotation storing its values func createAnnotationsTables(annotationToValues map[string]*[]string){ - var anno_path = topath + "annotation_tables/" + var anno_path = topath + annotation_tables_folder // create path if not existing if _, err := os.Stat(anno_path); os.IsNotExist(err) { os.Mkdir(anno_path, os.ModePerm) } for annotation, values := range annotationToValues { annotation = strings.Replace(strings.ToLower(annotation), " ", "_", -1) var anno_file = anno_path + annotation + ".csv" annofile, err := os.Create(anno_file) if err != nil {log.Fatal("Error while creating", anno_file); return} defer annofile.Close() valuesStr := "\"" + strings.Join(*values, "\"\n\"") + "\"" _, err = annofile.WriteString(valuesStr) if err != nil { log.Fatal("Error while writing file ", anno_file, err) } } } // given the map creates one csv file with all the annotation names func createAnnotationsNamesTable(annotationToValues map[string]*[]string){ - var anno_file = topath + "annotation_tables/annotation_names" + var anno_file = topath + "annotation_names.csv" annofile, err := os.Create(anno_file) if err != nil {log.Fatal("Error while creating", anno_file); return} defer annofile.Close() annotation_names := make([]string, 0, len(annotationToValues)) for name := range annotationToValues { annotation_names = append(annotation_names, name) } - + sort.Strings(annotation_names) valuesStr := "\"" + strings.Join(annotation_names, "\"\n\"") + "\"" _, err = annofile.WriteString(valuesStr) if err != nil { log.Fatal("Error while writing file ", anno_file, err) } } func containsOrAdd(array *[]string, str string) bool { for _, s := range *array { if s == str { return true } } *array = append(*array, str) return false } diff --git a/shrine-webclient/src/main/tools/create_tables/postgres_load_tables b/shrine-webclient/src/main/tools/create_tables/postgres_load_tables index 9e5e3c7cd..21ba23e95 100644 --- a/shrine-webclient/src/main/tools/create_tables/postgres_load_tables +++ b/shrine-webclient/src/main/tools/create_tables/postgres_load_tables @@ -1,57 +1,17 @@ create schema shrine_ont --- --- ANNOTATIONS METADATA --- -create table shrine_ont.genomic_annotations_metadata -( - annotation_id character varying(255) NOT NULL PRIMARY KEY, - annotation_values text NOT NULL -); - -ALTER TABLE shrine_ont.genomic_annotations_metadata OWNER TO shrine_ont; --- ALTER TABLE ONLY shrine_ont.genomic_annotations_metadata ADD CONSTRAINT annotation_id_pk PRIMARY KEY (annotation_id); - -truncate shrine_ont.genomic_annotations_metadata; -\copy shrine_ont.genomic_annotations_metadata FROM '/home/niccolo/Downloads/createAnnoMetadata/SHRINE_ONT_ANNOTATIONS_METADATA.csv' ESCAPE '"' DELIMITER ',' CSV; - - -- --- NEW ONTOLOGY GENOMIC --- -create table shrine_ont.genomic_annotations_new +create table shrine_ont.genomic_annotations ( variant_id character varying(255) NOT NULL PRIMARY KEY, variant_name character varying(255) NOT NULL, - annotations text NOT NULL, - t_depth numeric NOT NULL + hugo_gene_symbol character varying(255) NOT NULL, + protein_change character varying(255) NOT NULL, + annotations text NOT NULL + -- t_depth numeric NOT NULL ); -ALTER TABLE shrine_ont.genomic_annotations_new OWNER TO shrine_ont; --- ALTER TABLE ONLY shrine_ont.genomic_annotations_new ADD CONSTRAINT variant_id_pk PRIMARY KEY (variant_id); - -truncate shrine_ont.genomic_annotations_new; -\copy shrine_ont.genomic_annotations_new FROM '/home/niccolo/SHRINE/SHRINE_ONT_GENOMIC_ANNOTATIONS_NEW.csv' ESCAPE '"' DELIMITER ',' CSV; - - --- --- GENE VALUES --- -create table shrine_ont.gene_values -( - gene_value character varying(255) NOT NULL PRIMARY KEY -); - -ALTER TABLE shrine_ont.gene_values OWNER TO shrine_ont; --- ALTER TABLE ONLY shrine_ont.gene_values ADD CONSTRAINT variant_id_pk PRIMARY KEY (variant_id); - -truncate shrine_ont.gene_values; -\copy shrine_ont.gene_values FROM '/home/niccolo/SHRINE/SHRINE_GENES.csv' ESCAPE '"' DELIMITER ',' CSV; - - --- --- VARIANT NAMES --- -create table shrine_ont.variant_names -( - variant_name character varying(255) NOT NULL PRIMARY KEY -); - -ALTER TABLE shrine_ont.variant_names OWNER TO shrine_ont; --- ALTER TABLE ONLY shrine_ont.genomic_annotations_new ADD CONSTRAINT variant_id_pk PRIMARY KEY (variant_id); - -truncate shrine_ont.variant_names; -\copy shrine_ont.variant_names FROM '/home/niccolo/SHRINE/SHRINE_VARIANTS_NAMES.csv' ESCAPE '"' DELIMITER ',' CSV; +ALTER TABLE shrine_ont.genomic_annotations OWNER TO shrine_ont; +truncate shrine_ont.genomic_annotations; +\copy shrine_ont.genomic_annotations FROM '/home/niccolo/Downloads/new_tables/tables/genomic_annotations.csv' ESCAPE '"' DELIMITER ',' CSV;