diff --git a/R_funcs.R b/R_funcs.R
index c590423..b266059 100644
--- a/R_funcs.R
+++ b/R_funcs.R
@@ -1,61 +1,110 @@
 corr_heatmap <- function(X) {
     ## Description: compute the pairwise correlation of matrix X columns 
     ## and plot it as a heatmap.
     ## PARA: matrix X
     library(gplots)
     corr.pair <- cor(X)
     palette <- colorRampPalette(colors = c("blue", "white", "red"))
     ans <- heatmap.2(corr.pair, Rowv = NULL, Colv = NULL, dendrogram = "none", 
               scale = "none", trace = "none",
               col = palette, key.title = "Pearson cor. coef.")
     ans
 }
 
 
 get_tpm <- function(x, org=c('hg19', 'mm9', 'rheMac8')) {
     ## Description: Compute TPM from a table of counts.
     ## Para: -x: table with raw counts, ensembl id as rownames
     ##       -org: organism.
     countTable = x
     require(GenomicFeatures)
     org <- match.arg(org)
     ens <- makeTxDbFromUCSC(genome=org, tablename='ensGene',
                             url = 'http://genome-euro.ucsc.edu/cgi-bin/',
                             goldenPath_url = 'http://hgdownload.cse.ucsc.edu/goldenPath')
     exonic <- exonsBy(ens, by='gene')
     red.exonic <- reduce(exonic)
     exon.lengths <- sum(width(red.exonic))
     names(exon.lengths) <- unlist(lapply(strsplit(names(exon.lengths), '\\.'), function(x) x[[1]]))
     exon.lengths.o <- exon.lengths[rownames(countTable)]
     length_norm <- countTable / exon.lengths.o
     ans <- t(t(length_norm)/colSums(length_norm)) * 1e6 # see https://rpubs.com/bbolker/sweep_divide
     ans
 }
 
 get_high_leverage_lm <- function(model, thresh){
     ## Description: identify indices of high leverage observations in linear regression model
     ## Para: -model: linear regression model trained with lm()
     ##      -thresh: high leverage threshold (mult. of expected lev. stat.)
 
     # expected leverage (p+1)/n, ref ISLR)
     n_coef = nrow(coef(summary(model)))
     n_obs = length(summary(model)$residuals)
 
     exp_lev = (n_coef + 1) / n_obs
     thresh_lev = thresh * exp_lev
 
     # identifying high leverage observations
     levs = hatvalues(model)
     high_levs = which(levs>thresh_lev)
     high_levs
 }
 
 get_aliased_preds_lm <- function(model) {
     ## Description: identify aliased predictors (causing singularities) in a linear
     ## regression model trained by lm()
     ## Para: -model: linear regression model trained with lm()
 
     aliases = alias(model)
     aliased_preds = row.names(aliases$Complete)
     aliased_preds
 }
+
+# this function is pure bullshit and is only there to keep a souvenir of my
+# previous misunderstanding of 1) high leverage points and 2) collinear predictors
+# in least square regression with or without regularization. In particular, 
+# high leverage points do NOT require calculating the model (of course, they only
+# depend on the predictor matrix). Thus I do NOT need to care whether there are
+# singularities in the model to get my leverage points. I fooled myself because of 
+# blind trust into the hatvalues() function (presented in ISLR), when I can instead
+# use hat().
+purge_aliases_and_high_leverage_lm <- function(f, reg_mat, thresh){
+    ## Description: identify aliased predictors (causing singularities), remove them,
+    ## identify high leverage observations, remove them, identify new aliased predictors,
+    ## remove them, identify high leverage observations, remove them until no 
+    ## aliases or high leverage observations are left. Returns a list for
+    ## high leverage points and another one for aliases.
+    ## Para: -f: formula to train lm()
+    ##       -reg_mat: dataframe containing observations and predictors as columns
+    ##       -thresh: high leverage threshold (mult. of expected lev. stat.)
+    
+    #initialization
+    i = 1
+    aliased_preds = c()
+    high_levs = c()
+    repeat{
+        # identification and removal of aliased predictors
+        cat("Training model nr", i, "\n")
+        model = lm(f, data = reg_mat, singular.ok = T)
+        new_aliased_preds = get_aliased_preds_lm(model) 
+        aliased_preds = append(aliased_preds, new_aliased_preds)
+        cat("New aliased preds: ", new_aliased_preds, "\n")
+        #updating reg_mat
+        reg_mat = reg_mat[, which(!names(reg_mat) %in% aliased_preds)]
+
+        #identification and removal of high leverage observations
+        model = lm(f, data = reg_mat, singular.ok = F)
+        new_high_levs = get_high_leverage_lm(model, thresh)
+        cat("New high leverage observations: ", names(new_high_levs), "\n")
+
+        reg_mat = reg_mat[-new_high_levs, ]
+        high_levs = append(high_levs, names(new_high_levs))
+
+        if(length(new_high_levs) == 0) {
+            break
+        }
+        i = i+1
+    }
+    ans = list(aliased_preds, high_levs)
+    ans 
+}