# paths ## dir data_dir_p="data/10xgenomics_PBMC_5k_peaks" data_dir="data/10xgenomics_PBMC_5k" pwm_dir="data/pwm/jaspar_2018_clustering/" hg19_dir="data/genomes" results_dir="results/10xgenomics_PBMC_5k_peaks_classification_7" ## matrix files file_mat_open=$data_dir_p/'peaks_rmsk_openchromatin_1kb_read_atac.mat' file_mat_nucl=$data_dir_p/'peaks_rmsk_nucleosomes_1kb_fragment_center.mat' file_mat_seq=$data_dir_p/'peaks_rmsk_sequences_1kb.mat' ## file with seeds file_seed=$results_dir'/peaks_rmsk_seed.txt' mkdir -p $results_dir touch $file_seed # EM param n_iter='1' n_shift='971' n_core=8 ## PWM files jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat" hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat" myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat" pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat" cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat" irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat" irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat" lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat" foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat" sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat" mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat" elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat" stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat" nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat" ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat" e2f2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat" ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat" klf="$pwm_dir/cluster_28_node_14_3_motifs_prob.mat" nr4a1="$pwm_dir/cluster_2_node_12_4_motifs_prob.mat" egr="$pwm_dir/cluster_28_node_13_4_motifs_prob.mat" gata="$pwm_dir/cluster_21_node_5_6_motifs_prob.mat" nfat="$pwm_dir/cluster_19_node_2_3_motifs_prob.mat" runx="$pwm_dir/cluster_38_node_3_3_motifs_prob.mat" # classify for k in 23 do ## results files file_prob=$results_dir/'peaks_rmsk_sequences_1kb_'$k'class_prob.mat4d' file_mod1=$results_dir/'peaks_rmsk_openchromatin_1kb_read_atac_'$k'class_model.mat' file_mod2=$results_dir/'peaks_rmsk_nucleosomes_1kb_fragment_center_'$k'class_model.mat' file_mod3=$results_dir/'peaks_rmsk_sequences_1kb_'$k'class_model.mat' seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo) echo "$file_prob $seed" >> $file_seed bin/EMSequence --seq $file_mat_seq --class $k --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$stat6,$nfe2,$ahr,$e2f2,$ctcf,$klf,$nr4a1,$egr,$gata,$nfat,$runx --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1 bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2 bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3 # extend models file_mod1_ext=$results_dir/'peaks_rmsk_openchromatin_1kb_read_atac_'$k'class_model_extended.mat' file_mod2_ext=$results_dir/'peaks_rmsk_nucleosomes_1kb_fragment_center_'$k'class_model_extended.mat' file_mod3_ext=$results_dir/'peaks_rmsk_sequences_1kb_'$k'class_model_extended.mat' file_bed=$data_dir/'atac_v1_pbmc_5k_peaks_rmsk.bed' file_fasta=$hg19_dir/'hg19.fasta' file_bam_open=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam' file_bai_open=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai' file_bam_nucl=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam' file_bai_nucl=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai' bin/ReadModelExtender --bed $file_bed --bam $file_bam_open --bai $file_bai_open --prob $file_prob --from -500 --to 500 --ext 1000 --binSize 1 --method 'read_atac' --thread $n_core > $file_mod1_ext bin/ReadModelExtender --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --prob $file_prob --from -500 --to 500 --ext 1000 --binSize 1 --method 'fragment_center' --thread $n_core > $file_mod2_ext bin/SequenceModelExtender --bed $file_bed --fasta $file_fasta --prob $file_prob --from -500 --to 500 --ext 1000 --thread $n_core > $file_mod3_ext done