Page MenuHomec4science

Snakefile
No OneTemporary

File Metadata

Created
Mon, Feb 24, 00:04

Snakefile

from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
HTTP = HTTPRemoteProvider()
SAMPLES = ['DATASET_NAME']
for smp in SAMPLES:
print("Sample " + smp + " will be processed")
rule final:
input:
directory('/tmp/MOUSE_GRCm38.p6'),
expand('/output/{sample}_R1_fastqc.html', sample=SAMPLES),
expand('/output/{sample}_R2_fastqc.html', sample=SAMPLES),
expand('/output/{sample}_merged.fq', sample=SAMPLES),
expand('/output/{sample}_merged_sorted.fq', sample=SAMPLES),
expand('/output/{sample}_merged_sorted_mRNA.fq', sample=SAMPLES),
expand('/output/{sample}_unmerged_R1.fq', sample=SAMPLES),
expand('/output/{sample}_unmerged_R2.fq', sample=SAMPLES),
expand('/output/{sample}_trimmed_R1.fq', sample=SAMPLES),
expand('/output/{sample}_trimmed_R2.fq', sample=SAMPLES),
expand('/output/{sample}_trimmed_R1_fastqc.zip', sample=SAMPLES),
expand('/output/{sample}_trimmed_R2_fastqc.zip', sample=SAMPLES),
expand('/output/{sample}.Aligned.sortedByCoord.out.bam',sample=SAMPLES),
expand('/output/{sample}.Aligned.sortedByCoord.out.bam.bai',sample=SAMPLES),
# expand('/output/{sample}.txt',sample=SAMPLES),
rule get_genome:
input:
HTTP.remote("cloud.s3it.uzh.ch:8080/v1/AUTH_576f87a2a18948bdb2da11fdcad29ae2/RNA-genome/GENOME.zip", keep_local=True)
output:
directory('/tmp/MOUSE_GRCm38.p6')
priority:1
run:
outputName = os.path.join('/tmp',os.path.basename(input[0]))
shell("mv {input} {outputName} && unzip {outputName} -d /tmp && rm {outputName}")
rule perform_qc:
input:
R1='/input/{sample}_R1.fq',
R2='/input/{sample}_R2.fq',
params:
out_dir = '/output/'
output:
'/output/{sample}_R1_fastqc.html',
'/output/{sample}_R1_fastqc.zip',
'/output/{sample}_R2_fastqc.html',
'/output/{sample}_R2_fastqc.zip',
shell:
r'''
fastqc -o {params.out_dir} -f fastq {input.R1} {input.R2}
'''
rule merging_files:
input:
R_1='/input/{sample}_R1.fq',
R_2='/input/{sample}_R2.fq',
output:
'/output/{sample}_merged.fq',
message: """---MERGING----"""
shell:
'''
/usr/share/sortmerna/scripts/merge-paired-reads.sh {input.R_1} {input.R_2} {output}
'''
rule sortmerna:
input: '/output/{sample}_merged.fq',
params:
out_prefix='/output/{sample}_merged_sorted',
out_prefix_mRNA='/output/{sample}_merged_sorted_mRNA',
output: merged_file='/output/{sample}_merged_sorted_mRNA.fq', merged_sorted='/output/{sample}_merged_sorted.fq',
message: """---SORTING---"""
shell:
'''
sortmerna --ref /usr/share/sortmerna/rRNA_databases/silva-bac-23s-id98.fasta,/usr/share/sortmerna/rRNA_databases/index/silva-bac-23s-id98: --reads {input} --paired_in -a 16 --log --fastx --aligned {params.out_prefix_mRNA} --other {params.out_prefix}
'''
rule unmerging_files:
input: '/output/{sample}_merged.fq',
output:
R1='/output/{sample}_unmerged_R1.fq',
R2='/output/{sample}_unmerged_R2.fq',
message: """---MERGING----"""
shell:
'''
/usr/share/sortmerna/scripts/unmerge-paired-reads.sh {input} {output.R1} {output.R2}
'''
rule trimmometic_run:
input: R1_read='/output/{sample}_unmerged_R1.fq', R2_read='/output/{sample}_unmerged_R2.fq',
output:
R1_trimmed='/output/{sample}_trimmed_R1.fq',
R2_trimmed='/output/{sample}_trimmed_R2.fq',
R1_unpaired='/output/{sample}_trimmed_upaired_R1.fq',
R2_unpaired='/output/{sample}_trimmed_upaired_R2.fq',
message: """---TRIMMOMATIC---"""
shell:
'''
java -jar /usr/share/java/trimmomatic-0.35.jar PE -phred33 {input.R1_read} {input.R2_read} {output.R1_trimmed} {output.R1_unpaired} {output.R2_trimmed} {output.R2_unpaired} ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:30
'''
rule perform_qc_timmed:
input:
R1_trimmed_qc='/output/{sample}_trimmed_R1.fq',
R2_trimmed_qc='/output/{sample}_trimmed_R2.fq',
params:
out_dir = '/output/'
output:
'/output/{sample}_trimmed_R1_fastqc.zip',
'/output/{sample}_trimmed_R2_fastqc.zip',
message: """---QC trimmed Data---"""
shell:
r'''
fastqc -outdir {params.out_dir} -f fastq {input.R1_trimmed_qc} {input.R2_trimmed_qc}
'''
rule star_alignment:
input:
R1_trimmed='/output/{sample}_trimmed_R1.fq',
R2_trimmed='/output/{sample}_trimmed_R2.fq',
# params:
# BAM_file_name ='STAR/',
output:
BAM_file='/output/{sample}.Aligned.sortedByCoord.out.bam',
message: """---STAR ALIGNMENT---"""
shell:
'''
STAR --runThreadN 8 --runMode alignReads --genomeDir /tmp/MOUSE_GRCm38.p6/star_indices_overhang100/ --readFilesIn {input.R1_trimmed} {input.R2_trimmed} --outFileNamePrefix /output/{wildcards.sample}. --outSAMtype BAM SortedByCoordinate
'''
# to index the sorted bam file
rule samtools_index:
input:
bam_2='/output/{sample}.Aligned.sortedByCoord.out.bam',
output:
bam_indexed='/output/{sample}.Aligned.sortedByCoord.out.bam.bai',
message: """---SAMTOOLS_BAM_INDEX----"""
shell:
'''
samtools index {input.bam_2}
'''
# to convert BAM to BIGWIG
rule bamTObigwig:
input:
'STAR/{sample}.Aligned.sortedByCoord.out.bam',
output:
'bigwig/{sample}_aligned_sorted.bw',
message: """---CONVERT_BAM_TO_BIGWIG---"""
shell:
'''
bamCoverage --bam {input} -o {output} --outFileFormat bigwig --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2150570000
'''
# to count the read with HTSeq
rule HTSeq_count:
input:
'/output/{sample}.Aligned.sortedByCoord.out.bam',
output:
'/output/{sample}.txt',
message: """---HTSeq Count---"""
shell:
'''
htseq-count -s no -i gene_name {input} /tmp/MOUSE_GRCm38.p6/annotation/Mus_musculus.GRCm38.93.gtf > {output}
'''

Event Timeline