Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F62202107
DBSCAN_hyperparams.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, May 11, 14:29
Size
2 KB
Mime Type
text/x-python
Expires
Mon, May 13, 14:29 (2 d)
Engine
blob
Format
Raw Data
Handle
17618353
Attached To
R10679 hybrid_potential
DBSCAN_hyperparams.py
View Options
import
pandas
as
pd
import
numpy
as
np
import
xarray
as
xr
import
geopandas
as
gpd
from
sklearn.cluster
import
DBSCAN
import
os
import
time
import
calendar
coords
=
[
'x'
,
'y'
]
scratch_fp
=
'/scratch/walch/cluster_hubs'
# Load data
data
=
pd
.
read_csv
(
os
.
path
.
join
(
scratch_fp
,
'BLD_data.csv'
))
demand
=
data
[
data
.
annual_demand_kWh
>
0
]
cluster_data
=
demand
[
demand
.
peak_demand_W
<
1e6
]
print
(
cluster_data
.
head
())
print
(
len
(
cluster_data
))
EPS_vec
=
np
.
arange
(
50
,
750
,
50
)
min_samples_vec
=
np
.
arange
(
5
,
105
,
5
)
stats
=
[]
for
EPS
in
EPS_vec
:
for
MIN_SAMPLES
in
min_samples_vec
:
print
(
'EPS =
%d
; min_samples =
%d
'
%
(
EPS
,
MIN_SAMPLES
))
model
=
DBSCAN
(
eps
=
EPS
,
min_samples
=
MIN_SAMPLES
,
n_jobs
=
-
1
)
tt
=
time
.
time
()
cluster_data
[
'clusterID'
]
=
model
.
fit_predict
(
cluster_data
[
coords
]
)
cluster_time
=
time
.
time
()
-
tt
print
(
'Fit and predict in
%.2f
s'
%
(
cluster_time
))
# Prepare pd Series with stats
cluster_groups
=
cluster_data
[
cluster_data
.
clusterID
>
-
1
]
.
groupby
(
'clusterID'
)[
'peak_demand_W'
]
peak_demand_kW
=
cluster_groups
.
sum
()
/
1e3
large_clusters
=
peak_demand_kW
[(
peak_demand_kW
>
1000
)
&
(
peak_demand_kW
.
index
>=
0
)]
count_stats
=
cluster_groups
.
count
()
.
describe
()
.
iloc
[
1
:]
kWp_stats
=
peak_demand_kW
.
describe
()
.
iloc
[
1
:]
count_stats
.
index
=
[
'N_BLD_
%s
'
%
name
for
name
in
count_stats
.
index
]
kWp_stats
.
index
=
[
'kWp_
%s
'
%
name
for
name
in
kWp_stats
.
index
]
other_info
=
pd
.
Series
(
{
'EPS'
:
EPS
,
'min_samples'
:
MIN_SAMPLES
,
'exec_time'
:
cluster_time
,
'N_clusters'
:
len
(
peak_demand_kW
)
-
1
,
'N_clust_>1MWp'
:
len
(
peak_demand_kW
[
peak_demand_kW
>
1000
]),
'perc_outliers'
:
100
*
len
(
cluster_data
[
cluster_data
.
clusterID
==
-
1
])
/
len
(
cluster_data
),
'aprx_add_clst'
:
large_clusters
.
sum
()
/
1e3
})
stats
.
append
(
pd
.
concat
([
other_info
,
kWp_stats
,
count_stats
])
)
stats_all
=
pd
.
DataFrame
(
stats
)
print
(
stats_all
.
head
())
stats_all
.
to_csv
(
'/scratch/walch/scratch/DBSCAN_hyperparams_GWRonly.csv'
,
index
=
False
)
Event Timeline
Log In to Comment