diff --git a/get_tpm.R b/get_tpm.R new file mode 100644 index 0000000..0e9094c --- /dev/null +++ b/get_tpm.R @@ -0,0 +1,14 @@ +get_tpm <- function(x, org=c('hg19', 'mm9', 'rheMac8')) { ## Description: Compute TPM from a table of counts. + ## Para: -x: table with raw counts, ensembl id as rownames + ## -org: organism. + countTable = x + require(GenomicFeatures) org <- match.arg(org) ens <- makeTxDbFromUCSC(genome=org, tablename='ensGene', url = 'http://genome-euro.ucsc.edu/cgi-bin/', + goldenPath_url = 'http://hgdownload.cse.ucsc.edu/goldenPath') + + exonic <- exonsBy(ens, by='gene') + red.exonic <- reduce(exonic) + exon.lengths <- sum(width(red.exonic)) names(exon.lengths) <- unlist(lapply(strsplit(names(exon.lengths), '\\.'), function(x) x[[1]])) + exon.lengths.o <- exon.lengths[rownames(countTable)] length_norm <- countTable / exon.lengths.o + ans <- t(t(length_norm)/colSums(length_norm)) * 1e6 # see https://rpubs.com/bbolker/sweep_divide + ans +}