Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102890321
helpers.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Feb 25, 06:09
Size
6 KB
Mime Type
text/x-python
Expires
Thu, Feb 27, 06:09 (1 d, 20 h)
Engine
blob
Format
Raw Data
Handle
24450239
Attached To
R12535 ME-390-2022
helpers.py
View Options
from
typing
import
Optional
,
Tuple
,
List
,
Dict
import
numpy
as
np
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
import
plotly.graph_objects
as
go
def
normalize
(
X
,
mean
,
std
):
"""Normalization of array
Args:
X (np.array): Dataset of shape (N, D)
mean (np.array): Mean of shape (D, )
std (float): Standard deviation of shape(D, )
"""
return
(
X
-
mean
)
/
std
def
preprocess_data
(
df
:
pd
.
DataFrame
,
label
:
str
,
train_size
:
float
=
0.6
,
seed
:
Optional
[
int
]
=
None
,
)
->
Tuple
[
np
.
ndarray
,
np
.
ndarray
,
np
.
ndarray
,
np
.
ndarray
,
List
[
str
],
Dict
[
int
,
str
],]:
"""Transforms data into numpy arrays and splits it into a train and test set
Args:
df: Data to split
label: name of the training label
train_size: proportion of the data used for training
seed: random seed
Returns:
object: Tuple containing the training features, training label,
test features, test label, names of the features and map from label to label_name
"""
df
=
df
.
sort_values
(
by
=
label
)
df
[
label
]
=
df
[
label
]
.
astype
(
"category"
)
df
=
df
.
sample
(
frac
=
1
,
random_state
=
seed
)
train
,
test
=
(
df
[:
int
(
len
(
df
)
*
train_size
)],
df
[
int
(
len
(
df
)
*
train_size
)
:],
)
X_train
=
train
.
drop
(
columns
=
label
)
.
to_numpy
()
X_test
=
test
.
drop
(
columns
=
label
)
.
to_numpy
()
y_train
=
train
[
label
]
.
cat
.
codes
.
to_numpy
()
y_test
=
test
[
label
]
.
cat
.
codes
.
to_numpy
()
label_map
=
dict
(
enumerate
(
df
[
label
]
.
cat
.
categories
))
feature_names
=
list
(
df
.
drop
(
columns
=
label
)
.
columns
)
return
X_train
,
y_train
,
X_test
,
y_test
,
feature_names
,
label_map
def
plot_boundaries
(
X
,
y
,
w
,
b
,
output_func
,
class_names
,
ax_titles
=
None
,
train
=
True
):
markers
=
[
"o"
,
"v"
]
colors
=
([
0
,
0.5
,
0
],
[
0.25
,
0.25
,
1
])
eps
=
1e-6
# Plot when normalized
fig
=
plt
.
figure
(
figsize
=
(
10
,
8
))
ax
=
fig
.
add_subplot
(
111
)
# Plot decision boundary
x0_min
,
x0_max
=
X
[:,
0
]
.
min
(),
X
[:,
0
]
.
max
()
x1_min
,
x1_max
=
X
[:,
1
]
.
min
(),
X
[:,
1
]
.
max
()
x0_diff
=
x0_max
-
x0_min
x1_diff
=
x1_max
-
x1_min
xx
,
yy
=
np
.
mgrid
[
x0_min
-
x0_diff
/
10
:
x0_max
+
x0_diff
/
10
+
eps
:
x0_diff
/
50
,
x1_min
-
x1_diff
/
10
:
x1_max
+
x1_diff
/
10
+
eps
:
x1_diff
/
50
,
]
grid
=
np
.
c_
[
xx
.
ravel
(),
yy
.
ravel
()]
probs
=
output_func
(
grid
,
w
,
b
)
.
reshape
(
xx
.
shape
)
contour
=
ax
.
contourf
(
xx
,
yy
,
probs
,
25
,
cmap
=
"GnBu"
,
vmin
=
0
,
vmax
=
1
,
alpha
=
1
)
ax_c
=
fig
.
colorbar
(
contour
)
ax_c
.
set_label
(
"$P(y = 1)$"
)
ax_c
.
set_ticks
(
np
.
arange
(
0
,
1.01
,
0.1
))
# End of plotting decision boundary
for
i
,
(
class_name
,
marker
,
color
)
in
enumerate
(
zip
(
class_names
,
markers
,
colors
)):
ax
.
scatter
(
x
=
X
[
y
.
squeeze
()
==
i
][:,
0
],
y
=
X
[
y
.
squeeze
()
==
i
][:,
1
],
color
=
color
,
marker
=
marker
,
label
=
class_name
,
alpha
=
1
,
s
=
100
,
edgecolors
=
"#FFFFFF"
,
linewidths
=
1
,
)
ax
.
set_aspect
(
1
)
ax
.
set_xlim
([
x0_min
-
x0_diff
/
10
,
x0_max
+
x0_diff
/
10
])
ax
.
set_ylim
([
x1_min
-
x1_diff
/
10
,
x1_max
+
x1_diff
/
10
])
if
train
:
plt
.
title
(
"Training set ({} examples)"
.
format
(
len
(
X
)),
fontsize
=
16
)
else
:
plt
.
title
(
"Test set ({} examples)"
.
format
(
len
(
X
)),
fontsize
=
16
)
if
ax_titles
is
not
None
:
plt
.
xlabel
(
ax_titles
[
0
],
fontsize
=
14
)
plt
.
ylabel
(
ax_titles
[
1
],
fontsize
=
14
)
plt
.
legend
(
prop
=
{
"size"
:
14
},
loc
=
"best"
)
plt
.
show
()
def
interactive_boundaries
(
X_train
,
y_train
,
X_test
,
y_test
,
w_list
,
b_list
,
output_func
,
class_names
,
ax_titles
=
None
,
total_steps
=
50
,
):
"""
Plots interactive boundaries in a binary logistic regression setting using Plotly
"""
eps
=
1e-6
total_steps
=
min
(
total_steps
,
len
(
w_list
))
colors
=
[
"rgb(0,127,0)"
,
"rgb(64,64,255)"
]
X
=
np
.
concatenate
((
X_train
,
X_test
))
# Create a mesh grid on which we will run our model
x0_min
,
x0_max
=
X
[:,
0
]
.
min
(),
X
[:,
0
]
.
max
()
x1_min
,
x1_max
=
X
[:,
1
]
.
min
(),
X
[:,
1
]
.
max
()
x0_diff
=
x0_max
-
x0_min
x1_diff
=
x1_max
-
x1_min
x0_range
=
np
.
arange
(
x0_min
-
x0_diff
/
10
,
x0_max
+
x0_diff
/
10
+
eps
,
x0_diff
/
50
)
x1_range
=
np
.
arange
(
x1_min
-
x1_diff
/
10
,
x1_max
+
x1_diff
/
10
+
eps
,
x1_diff
/
50
)
xx
,
yy
=
np
.
meshgrid
(
x0_range
,
x1_range
)
grid
=
np
.
c_
[
xx
.
ravel
(),
yy
.
ravel
()]
# Initialize Plotly figure
fig
=
go
.
Figure
()
linspace
=
np
.
linspace
(
0
,
len
(
w_list
)
-
1
,
num
=
total_steps
,
dtype
=
int
)
# Plot decision boundaries
for
i
in
linspace
:
probs
=
output_func
(
grid
,
w_list
[
i
],
b_list
[
i
])
.
reshape
(
xx
.
shape
)
fig
.
add_trace
(
go
.
Contour
(
x
=
x0_range
,
y
=
x1_range
,
z
=
probs
,
showscale
=
True
,
colorscale
=
"GnBu"
,
opacity
=
0.8
,
hoverinfo
=
"skip"
,
visible
=
False
,
)
)
# Plot points
trace_specs
=
[
[
X_train
,
y_train
,
0
,
"Train"
,
"circle"
,
colors
[
0
]],
[
X_train
,
y_train
,
1
,
"Train"
,
"triangle-down"
,
colors
[
1
]],
[
X_test
,
y_test
,
0
,
"Test"
,
"circle-dot"
,
colors
[
0
]],
[
X_test
,
y_test
,
1
,
"Test"
,
"triangle-down-dot"
,
colors
[
1
]],
]
for
X
,
y
,
label
,
split
,
marker
,
color
in
trace_specs
:
fig
.
add_trace
(
go
.
Scatter
(
x
=
X
[
y
==
label
,
0
],
y
=
X
[
y
==
label
,
1
],
name
=
f
"{class_names[label]}, {split}"
,
mode
=
"markers"
,
marker
=
dict
(
size
=
12
,
symbol
=
marker
,
color
=
color
,
line
=
dict
(
width
=
1
,
color
=
"White"
),
),
)
)
# Add slider
steps
=
[]
for
i
in
range
(
len
(
linspace
)):
step
=
dict
(
method
=
"update"
,
label
=
f
"{linspace[i]}"
,
args
=
[
{
"visible"
:
[
False
]
*
len
(
linspace
)
+
[
True
]
*
4
}
],
# last 4 traces are the points, always show them
)
step
[
"args"
][
0
][
"visible"
][
i
]
=
True
# Toggle i'th trace to "visible"
steps
.
append
(
step
)
sliders
=
[
dict
(
active
=
len
(
linspace
),
currentvalue
=
{
"prefix"
:
"iteration: "
},
steps
=
steps
)
]
# Customize layout
fig
.
update_layout
(
sliders
=
sliders
,
legend
=
dict
(
yanchor
=
"top"
,
y
=
0.99
,
xanchor
=
"left"
,
x
=
0.01
),
)
if
ax_titles
is
not
None
:
fig
.
update_layout
(
xaxis_title
=
ax_titles
[
0
],
yaxis_title
=
ax_titles
[
1
])
fig
[
"layout"
]
.
update
(
autosize
=
False
,
width
=
600
,
height
=
600
)
fig
.
show
()
Event Timeline
Log In to Comment