Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F120436362
project1.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Jul 4, 09:09
Size
3 KB
Mime Type
text/x-python
Expires
Sun, Jul 6, 09:09 (2 d)
Engine
blob
Format
Raw Data
Handle
27184942
Attached To
rTZUCT ML_Project1
project1.py
View Options
# coding: utf-8
# In[4]:
#get_ipython().magic('matplotlib inline')
import
numpy
as
np
import
matplotlib.pyplot
as
plt
#get_ipython().magic('load_ext autoreload')
#get_ipython().magic('autoreload 2')
# In[5]:
from
proj1_helpers
import
*
print
(
"loading data"
)
y_tr
,
x_tr
,
ids_tr
=
load_csv_data
(
"train.csv"
)
y_te
,
x_te
,
ids_te
=
load_csv_data
(
"test.csv"
)
#Data preprocessing, removing outliers
x_tr_outlier_set_null
=
(
x_tr
!=
-
999
)
*
x_tr
x_tr_mean_num
=
np
.
sum
((
x_tr
!=
-
999
)
*
x_tr
,
axis
=
0
)
x_tr_mean_den
=
np
.
sum
((
x_tr
!=
-
999
),
axis
=
0
)
x_tr_mean
=
x_tr_mean_num
/
x_tr_mean_den
x_tr
=
x_tr_outlier_set_null
+
((
x_tr
==
-
999
)
*
x_tr_mean
)
one_column_tr
=
np
.
ones
((
x_tr
.
shape
[
0
],
1
))
tx_tr
=
np
.
hstack
([
one_column_tr
,
x_tr
])
x_te_outlier_set_null
=
(
x_te
!=
-
999
)
*
x_te
x_te_mean_num
=
np
.
sum
((
x_te
!=
-
999
)
*
x_te
,
axis
=
0
)
x_te_mean_den
=
np
.
sum
((
x_te
!=
-
999
),
axis
=
0
)
x_te_mean
=
x_te_mean_num
/
x_te_mean_den
x_te
=
x_te_outlier_set_null
+
((
x_te
==
-
999
)
*
x_te_mean
)
one_column_te
=
np
.
ones
((
x_te
.
shape
[
0
],
1
))
tx_te
=
np
.
hstack
([
one_column_te
,
x_te
])
y_tr
=
(
y_tr
+
1
)
/
2
y_te
=
(
y_te
+
1
)
/
2
print
(
"Data loaded"
)
# In[22]:
# In[6]:
# ridge regression
from
ridge_regression
import
*
def
ridge_reg_test
():
lambdas
=
np
.
logspace
(
-
5
,
0
,
15
)
ratio
=
len
(
tx_tr
)
/
(
len
(
tx_tr
)
+
len
(
tx_te
))
rmse_tr
=
[]
rmse_te
=
[]
w_array
=
[]
for
ind
,
lambda_
in
enumerate
(
lambdas
):
# ***************************************************
# INSERT YOUR CODE HERE
# ridge regression with a given lambda
# ***************************************************
w
=
ridge_regression
(
y_tr
,
tx_tr
,
lambda_
)
mse_tr
=
compute_mse
(
y_tr
,
tx_tr
,
w
)
mse_te
=
compute_mse
(
y_te
,
tx_te
,
w
)
rmse_tr
.
append
(
np
.
sqrt
(
2
*
mse_tr
))
rmse_te
.
append
(
np
.
sqrt
(
2
*
mse_te
))
w_array
.
append
(
w
)
print
(
"proportion={p}, lambda={l:.3f}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}"
.
format
(
p
=
ratio
,
l
=
lambda_
,
tr
=
rmse_tr
[
ind
],
te
=
rmse_te
[
ind
]))
return
w_array
[
0
]
# In[12]:
from
logistic_regression
import
*
from
ridge_regression
import
*
from
least_squares
import
*
def
logistic_reg_test
():
max_iter
=
10000
threshold
=
1e-8
gamma
=
0.01
lambda_
=
0.1
losses
=
[]
# build tx
#tx = np.c_[np.ones((y.shape[0], 1)), x]
w_initial
=
np
.
zeros
((
tx_tr
.
shape
[
1
],
1
))
#w_initial, loss_initial = least_squares(y_tr, tx_tr) #0.67753
#w_initial = ridge_regression(y_tr, tx_tr, lambda_)
w
,
loss
=
learning_by_gradient_descent
(
y_tr
,
tx_tr
,
w_initial
,
gamma
)
return
np
.
array
(
w
)
# start the logistic regression
#for iter in range(max_iter):
# get loss and update w.
#w, loss = learning_by_gradient_descent(y_tr, x_tr, w.T, gamma)
# log info
# if iter % 1 == 0:
# print("Current iteration={i}, the loss={l}".format(i=iter, l=loss))
# converge criterion
#losses.append(loss)
#if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
# break
# visualization
#visualization(y, x, mean_x, std_x, w, "classification_by_logistic_regression_newton_method")
#print("loss={l}".format(l=calculate_loss(y_te, x_te, w)))
#return w
# In[8]:
print
(
y_tr
.
shape
)
print
(
tx_tr
.
shape
)
# In[13]:
from
proj1_helpers
import
*
print
(
"running reg"
)
w
=
logistic_reg_test
()
print
(
w
.
shape
)
prediction
=
predict_labels
(
w
,
tx_te
)
print
(
prediction
.
shape
)
create_csv_submission
(
ids_te
,
prediction
,
"submission_logisitic_reg.csv"
)
#create_csv_submission(ids_te, prediction, "submission2")
Event Timeline
Log In to Comment