Page MenuHomec4science

project1.py
No OneTemporary

File Metadata

Created
Fri, Jul 4, 09:09

project1.py

# coding: utf-8
# In[4]:
#get_ipython().magic('matplotlib inline')
import numpy as np
import matplotlib.pyplot as plt
#get_ipython().magic('load_ext autoreload')
#get_ipython().magic('autoreload 2')
# In[5]:
from proj1_helpers import *
print("loading data")
y_tr, x_tr, ids_tr = load_csv_data("train.csv")
y_te, x_te, ids_te = load_csv_data("test.csv")
#Data preprocessing, removing outliers
x_tr_outlier_set_null = (x_tr != -999)*x_tr
x_tr_mean_num = np.sum((x_tr != -999)*x_tr, axis = 0)
x_tr_mean_den = np.sum((x_tr != -999), axis = 0)
x_tr_mean = x_tr_mean_num/x_tr_mean_den
x_tr = x_tr_outlier_set_null + ((x_tr == -999)*x_tr_mean)
one_column_tr = np.ones((x_tr.shape[0], 1))
tx_tr = np.hstack([one_column_tr, x_tr])
x_te_outlier_set_null = (x_te != -999)*x_te
x_te_mean_num = np.sum((x_te != -999)*x_te, axis = 0)
x_te_mean_den = np.sum((x_te != -999), axis = 0)
x_te_mean = x_te_mean_num/x_te_mean_den
x_te = x_te_outlier_set_null + ((x_te == -999)*x_te_mean)
one_column_te = np.ones((x_te.shape[0], 1))
tx_te = np.hstack([one_column_te, x_te])
y_tr = (y_tr + 1)/2
y_te = (y_te + 1)/2
print("Data loaded")
# In[22]:
# In[6]:
# ridge regression
from ridge_regression import *
def ridge_reg_test():
lambdas = np.logspace(-5, 0, 15)
ratio = len(tx_tr)/(len(tx_tr) + len(tx_te))
rmse_tr = []
rmse_te = []
w_array = []
for ind, lambda_ in enumerate(lambdas):
# ***************************************************
# INSERT YOUR CODE HERE
# ridge regression with a given lambda
# ***************************************************
w = ridge_regression(y_tr, tx_tr, lambda_)
mse_tr = compute_mse(y_tr, tx_tr, w)
mse_te = compute_mse(y_te, tx_te, w)
rmse_tr.append(np.sqrt(2*mse_tr))
rmse_te.append(np.sqrt(2*mse_te))
w_array.append(w)
print("proportion={p}, lambda={l:.3f}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
p=ratio, l=lambda_, tr=rmse_tr[ind], te=rmse_te[ind]))
return w_array[0]
# In[12]:
from logistic_regression import *
from ridge_regression import *
from least_squares import *
def logistic_reg_test():
max_iter = 10000
threshold = 1e-8
gamma = 0.01
lambda_ = 0.1
losses = []
# build tx
#tx = np.c_[np.ones((y.shape[0], 1)), x]
w_initial = np.zeros((tx_tr.shape[1], 1))
#w_initial, loss_initial = least_squares(y_tr, tx_tr) #0.67753
#w_initial = ridge_regression(y_tr, tx_tr, lambda_)
w, loss = learning_by_gradient_descent(y_tr, tx_tr, w_initial, gamma)
return np.array(w)
# start the logistic regression
#for iter in range(max_iter):
# get loss and update w.
#w, loss = learning_by_gradient_descent(y_tr, x_tr, w.T, gamma)
# log info
# if iter % 1 == 0:
# print("Current iteration={i}, the loss={l}".format(i=iter, l=loss))
# converge criterion
#losses.append(loss)
#if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
# break
# visualization
#visualization(y, x, mean_x, std_x, w, "classification_by_logistic_regression_newton_method")
#print("loss={l}".format(l=calculate_loss(y_te, x_te, w)))
#return w
# In[8]:
print(y_tr.shape)
print(tx_tr.shape)
# In[13]:
from proj1_helpers import *
print("running reg")
w = logistic_reg_test()
print(w.shape)
prediction = predict_labels(w, tx_te)
print(prediction.shape)
create_csv_submission(ids_te, prediction, "submission_logisitic_reg.csv")
#create_csv_submission(ids_te, prediction, "submission2")

Event Timeline