Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102767618
Snakefile
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Feb 24, 00:04
Size
6 KB
Mime Type
text/x-python
Expires
Wed, Feb 26, 00:04 (2 d)
Engine
blob
Format
Raw Data
Handle
24418718
Attached To
R7855 grna
Snakefile
View Options
from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
HTTP = HTTPRemoteProvider()
SAMPLES = ['DATASET_NAME']
for smp in SAMPLES:
print("Sample " + smp + " will be processed")
rule final:
input:
directory('/tmp/MOUSE_GRCm38.p6'),
expand('/output/{sample}_R1_fastqc.html', sample=SAMPLES),
expand('/output/{sample}_R2_fastqc.html', sample=SAMPLES),
expand('/output/{sample}_merged.fq', sample=SAMPLES),
expand('/output/{sample}_merged_sorted.fq', sample=SAMPLES),
expand('/output/{sample}_merged_sorted_mRNA.fq', sample=SAMPLES),
expand('/output/{sample}_unmerged_R1.fq', sample=SAMPLES),
expand('/output/{sample}_unmerged_R2.fq', sample=SAMPLES),
expand('/output/{sample}_trimmed_R1.fq', sample=SAMPLES),
expand('/output/{sample}_trimmed_R2.fq', sample=SAMPLES),
expand('/output/{sample}_trimmed_R1_fastqc.zip', sample=SAMPLES),
expand('/output/{sample}_trimmed_R2_fastqc.zip', sample=SAMPLES),
expand('/output/{sample}.Aligned.sortedByCoord.out.bam',sample=SAMPLES),
expand('/output/{sample}.Aligned.sortedByCoord.out.bam.bai',sample=SAMPLES),
# expand('/output/{sample}.txt',sample=SAMPLES),
rule get_genome:
input:
HTTP.remote("cloud.s3it.uzh.ch:8080/v1/AUTH_576f87a2a18948bdb2da11fdcad29ae2/RNA-genome/GENOME.zip", keep_local=True)
output:
directory('/tmp/MOUSE_GRCm38.p6')
priority:1
run:
outputName = os.path.join('/tmp',os.path.basename(input[0]))
shell("mv {input} {outputName} && unzip {outputName} -d /tmp && rm {outputName}")
rule perform_qc:
input:
R1='/input/{sample}_R1.fq',
R2='/input/{sample}_R2.fq',
params:
out_dir = '/output/'
output:
'/output/{sample}_R1_fastqc.html',
'/output/{sample}_R1_fastqc.zip',
'/output/{sample}_R2_fastqc.html',
'/output/{sample}_R2_fastqc.zip',
shell:
r'''
fastqc -o {params.out_dir} -f fastq {input.R1} {input.R2}
'''
rule merging_files:
input:
R_1='/input/{sample}_R1.fq',
R_2='/input/{sample}_R2.fq',
output:
'/output/{sample}_merged.fq',
message: """---MERGING----"""
shell:
'''
/usr/share/sortmerna/scripts/merge-paired-reads.sh {input.R_1} {input.R_2} {output}
'''
rule sortmerna:
input: '/output/{sample}_merged.fq',
params:
out_prefix='/output/{sample}_merged_sorted',
out_prefix_mRNA='/output/{sample}_merged_sorted_mRNA',
output: merged_file='/output/{sample}_merged_sorted_mRNA.fq', merged_sorted='/output/{sample}_merged_sorted.fq',
message: """---SORTING---"""
shell:
'''
sortmerna --ref /usr/share/sortmerna/rRNA_databases/silva-bac-23s-id98.fasta,/usr/share/sortmerna/rRNA_databases/index/silva-bac-23s-id98: --reads {input} --paired_in -a 16 --log --fastx --aligned {params.out_prefix_mRNA} --other {params.out_prefix}
'''
rule unmerging_files:
input: '/output/{sample}_merged.fq',
output:
R1='/output/{sample}_unmerged_R1.fq',
R2='/output/{sample}_unmerged_R2.fq',
message: """---MERGING----"""
shell:
'''
/usr/share/sortmerna/scripts/unmerge-paired-reads.sh {input} {output.R1} {output.R2}
'''
rule trimmometic_run:
input: R1_read='/output/{sample}_unmerged_R1.fq', R2_read='/output/{sample}_unmerged_R2.fq',
output:
R1_trimmed='/output/{sample}_trimmed_R1.fq',
R2_trimmed='/output/{sample}_trimmed_R2.fq',
R1_unpaired='/output/{sample}_trimmed_upaired_R1.fq',
R2_unpaired='/output/{sample}_trimmed_upaired_R2.fq',
message: """---TRIMMOMATIC---"""
shell:
'''
java -jar /usr/share/java/trimmomatic-0.35.jar PE -phred33 {input.R1_read} {input.R2_read} {output.R1_trimmed} {output.R1_unpaired} {output.R2_trimmed} {output.R2_unpaired} ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:30
'''
rule perform_qc_timmed:
input:
R1_trimmed_qc='/output/{sample}_trimmed_R1.fq',
R2_trimmed_qc='/output/{sample}_trimmed_R2.fq',
params:
out_dir = '/output/'
output:
'/output/{sample}_trimmed_R1_fastqc.zip',
'/output/{sample}_trimmed_R2_fastqc.zip',
message: """---QC trimmed Data---"""
shell:
r'''
fastqc -outdir {params.out_dir} -f fastq {input.R1_trimmed_qc} {input.R2_trimmed_qc}
'''
rule star_alignment:
input:
R1_trimmed='/output/{sample}_trimmed_R1.fq',
R2_trimmed='/output/{sample}_trimmed_R2.fq',
# params:
# BAM_file_name ='STAR/',
output:
BAM_file='/output/{sample}.Aligned.sortedByCoord.out.bam',
message: """---STAR ALIGNMENT---"""
shell:
'''
STAR --runThreadN 8 --runMode alignReads --genomeDir /tmp/MOUSE_GRCm38.p6/star_indices_overhang100/ --readFilesIn {input.R1_trimmed} {input.R2_trimmed} --outFileNamePrefix /output/{wildcards.sample}. --outSAMtype BAM SortedByCoordinate
'''
# to index the sorted bam file
rule samtools_index:
input:
bam_2='/output/{sample}.Aligned.sortedByCoord.out.bam',
output:
bam_indexed='/output/{sample}.Aligned.sortedByCoord.out.bam.bai',
message: """---SAMTOOLS_BAM_INDEX----"""
shell:
'''
samtools index {input.bam_2}
'''
# to convert BAM to BIGWIG
rule bamTObigwig:
input:
'STAR/{sample}.Aligned.sortedByCoord.out.bam',
output:
'bigwig/{sample}_aligned_sorted.bw',
message: """---CONVERT_BAM_TO_BIGWIG---"""
shell:
'''
bamCoverage --bam {input} -o {output} --outFileFormat bigwig --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2150570000
'''
# to count the read with HTSeq
rule HTSeq_count:
input:
'/output/{sample}.Aligned.sortedByCoord.out.bam',
output:
'/output/{sample}.txt',
message: """---HTSeq Count---"""
shell:
'''
htseq-count -s no -i gene_name {input} /tmp/MOUSE_GRCm38.p6/annotation/Mus_musculus.GRCm38.93.gtf > {output}
'''
Event Timeline
Log In to Comment