Page MenuHomec4science

run_sgd.py
No OneTemporary

File Metadata

Created
Sun, Feb 23, 15:18

run_sgd.py

import numpy as np
import sklearn as sk
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
import csv
def create_csv_submission(ids, y_pred, name):
"""
Creates an output file in csv format for submission to kaggle
Arguments: ids (event ids associated with each prediction)
y_pred (predicted class labels)
name (string name of .csv output file to be created)
"""
with open(name, 'w') as csvfile:
fieldnames = ['Id', 'Prediction']
writer = csv.DictWriter(csvfile, delimiter=",", fieldnames=fieldnames)
writer.writeheader()
for r1, r2 in zip(ids, y_pred):
writer.writerow({'Id':int(r1),'Prediction':int(r2)})
def create_submission(y):
ids = range(1,len(y)+1)
create_csv_submission(ids, y, "new_submission.csv")
neg_sentences = np.load("pre_train_neg_feature.npy")[0]
pos_sentences = np.load("pre_train_pos_feature.npy")[0]
x_te = np.load("pre_test_data_feature.npy")[0]
x_tr = np.concatenate([neg_sentences, pos_sentences])
y_neg = np.zeros(len(neg_sentences))
y_pos = np.ones(len(pos_sentences))
y_tr = np.concatenate([y_neg, y_pos])
scaler = StandardScaler()
scaler.fit(x_tr)
# We standardize the data
x_tr = scaler.transform(x_tr)
x_te = scaler.transform(x_te)
clf = SGDClassifier(loss="hinge", penalty="l2")
clf.fit(x_tr, y_tr)
y_te = clf.predict(x_te)
y_final = (y_te * 2) - 1
create_submission(y_final)

Event Timeline