Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102776031
helpers.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Feb 24, 02:04
Size
2 KB
Mime Type
text/x-python
Expires
Wed, Feb 26, 02:04 (2 d)
Engine
blob
Format
Raw Data
Handle
24421604
Attached To
rTZUCT ML_Project1
helpers.py
View Options
# -*- coding: utf-8 -*-
"""some helper functions."""
import
numpy
as
np
def
load_data
(
sub_sample
=
True
,
add_outlier
=
False
):
"""Load data and convert it to the metrics system."""
path_dataset
=
"height_weight_genders.csv"
data
=
np
.
genfromtxt
(
path_dataset
,
delimiter
=
","
,
skip_header
=
1
,
usecols
=
[
1
,
2
])
height
=
data
[:,
0
]
weight
=
data
[:,
1
]
gender
=
np
.
genfromtxt
(
path_dataset
,
delimiter
=
","
,
skip_header
=
1
,
usecols
=
[
0
],
converters
=
{
0
:
lambda
x
:
0
if
b
"Male"
in
x
else
1
})
# Convert to metric system
height
*=
0.025
weight
*=
0.454
# sub-sample
if
sub_sample
:
height
=
height
[::
50
]
weight
=
weight
[::
50
]
if
add_outlier
:
# outlier experiment
height
=
np
.
concatenate
([
height
,
[
1.1
,
1.2
]])
weight
=
np
.
concatenate
([
weight
,
[
51.5
/
0.454
,
55.3
/
0.454
]])
return
height
,
weight
,
gender
def
standardize
(
x
):
"""Standardize the original data set."""
mean_x
=
np
.
mean
(
x
)
x
=
x
-
mean_x
std_x
=
np
.
std
(
x
)
x
=
x
/
std_x
return
x
,
mean_x
,
std_x
def
build_model_data
(
height
,
weight
):
"""Form (y,tX) to get regression data in matrix form."""
y
=
weight
x
=
height
num_samples
=
len
(
y
)
tx
=
np
.
c_
[
np
.
ones
(
num_samples
),
x
]
return
y
,
tx
def
batch_iter
(
y
,
tx
,
batch_size
,
num_batches
=
1
,
shuffle
=
True
):
"""
Generate a minibatch iterator for a dataset.
Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
Example of use :
for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
<DO-SOMETHING>
"""
data_size
=
len
(
y
)
if
shuffle
:
shuffle_indices
=
np
.
random
.
permutation
(
np
.
arange
(
data_size
))
shuffled_y
=
y
[
shuffle_indices
]
shuffled_tx
=
tx
[
shuffle_indices
]
else
:
shuffled_y
=
y
shuffled_tx
=
tx
for
batch_num
in
range
(
num_batches
):
start_index
=
batch_num
*
batch_size
end_index
=
min
((
batch_num
+
1
)
*
batch_size
,
data_size
)
if
start_index
!=
end_index
:
yield
shuffled_y
[
start_index
:
end_index
],
shuffled_tx
[
start_index
:
end_index
]
Event Timeline
Log In to Comment