Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102733090
run_sgd.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Feb 23, 15:18
Size
1 KB
Mime Type
text/x-python
Expires
Tue, Feb 25, 15:18 (2 d)
Engine
blob
Format
Raw Data
Handle
24409538
Attached To
rTZUCT ML_Project1
run_sgd.py
View Options
import
numpy
as
np
import
sklearn
as
sk
import
pickle
from
sklearn.preprocessing
import
StandardScaler
from
sklearn.linear_model
import
SGDClassifier
import
csv
def
create_csv_submission
(
ids
,
y_pred
,
name
):
"""
Creates an output file in csv format for submission to kaggle
Arguments: ids (event ids associated with each prediction)
y_pred (predicted class labels)
name (string name of .csv output file to be created)
"""
with
open
(
name
,
'w'
)
as
csvfile
:
fieldnames
=
[
'Id'
,
'Prediction'
]
writer
=
csv
.
DictWriter
(
csvfile
,
delimiter
=
","
,
fieldnames
=
fieldnames
)
writer
.
writeheader
()
for
r1
,
r2
in
zip
(
ids
,
y_pred
):
writer
.
writerow
({
'Id'
:
int
(
r1
),
'Prediction'
:
int
(
r2
)})
def
create_submission
(
y
):
ids
=
range
(
1
,
len
(
y
)
+
1
)
create_csv_submission
(
ids
,
y
,
"new_submission.csv"
)
neg_sentences
=
np
.
load
(
"pre_train_neg_feature.npy"
)[
0
]
pos_sentences
=
np
.
load
(
"pre_train_pos_feature.npy"
)[
0
]
x_te
=
np
.
load
(
"pre_test_data_feature.npy"
)[
0
]
x_tr
=
np
.
concatenate
([
neg_sentences
,
pos_sentences
])
y_neg
=
np
.
zeros
(
len
(
neg_sentences
))
y_pos
=
np
.
ones
(
len
(
pos_sentences
))
y_tr
=
np
.
concatenate
([
y_neg
,
y_pos
])
scaler
=
StandardScaler
()
scaler
.
fit
(
x_tr
)
# We standardize the data
x_tr
=
scaler
.
transform
(
x_tr
)
x_te
=
scaler
.
transform
(
x_te
)
clf
=
SGDClassifier
(
loss
=
"hinge"
,
penalty
=
"l2"
)
clf
.
fit
(
x_tr
,
y_tr
)
y_te
=
clf
.
predict
(
x_te
)
y_final
=
(
y_te
*
2
)
-
1
create_submission
(
y_final
)
Event Timeline
Log In to Comment