mkdir -p data/10xgenomics_PBMC_5k # get fragment lenghts, taken on https://dbrg77.wordpress.com/2017/02/10/atac-seq-insert-size-plotting/ samtools view data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered.bam | awk '$9>0' | cut -f 9 | sort | uniq -c | sort -k 2,2n | sed -e 's/^[ \t]*//' > data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_fragment_lengths.txt # analyse the fragment lengths python3.6 scripts/bam_tools/split_by_length.py -i data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered.bam -o data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_30-149bp.bam --length 30-149 # based on fragment length analysis, separate the fragments as follows : # 30 - 84bp : open chromatin fragments # 133 - 266bp : mono-nucleosome fragments # 341 - 500bp : di-nucleosome fragments python3.6 scripts/bam_tools/split_by_length.py -i data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered.bam -o data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam --length 30-84 python3.6 scripts/bam_tools/split_by_length.py -i data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered.bam -o data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam --length 133-266 python3.6 scripts/bam_tools/split_by_length.py -i data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered.bam -o data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam --length 341-500 # split di-nucleosome fragments into mononucleosome fragments python3.6 scripts/bam_tools/split_in_two.py -i data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam -o data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam samtools sort data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam > data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo_sort.bam mv data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo_sort.bam data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam # index all BAM files samtools index data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam samtools index data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam samtools index data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam samtools index data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam