Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F120467284
cluster_pv.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Jul 4, 14:42
Size
1 KB
Mime Type
text/x-python
Expires
Sun, Jul 6, 14:42 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
27164848
Attached To
R8797 solarPV
cluster_pv.py
View Options
import
numpy
as
np
import
pandas
as
pd
import
os
import
sys
import
time
from
sklearn.cluster
import
DBSCAN
#### Variable definitions
## INPUTS
INFILE
=
sys
.
argv
[
1
]
OUTFILE
=
sys
.
argv
[
2
]
# Optional inputs
if
len
(
sys
.
argv
)
>
3
:
MIN_SIZE
=
float
(
sys
.
argv
[
3
])
# Minimum size of installations (in m2)
else
:
MIN_SIZE
=
2.5
# DBSCAN Hyperparameters: see https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html
if
len
(
sys
.
argv
)
>
5
:
EPS
=
float
(
sys
.
argv
[
4
])
MIN_SAMPLE
=
int
(
sys
.
argv
[
5
])
else
:
EPS
=
0.72
# Accept 2 missing pixels in x, y, or diagonal direction
MIN_SAMPLE
=
10
# Minimum 10 samples for "core points"
# Constants:
COORDS
=
[
'x'
,
'y'
]
print
(
'
\n
Load pixel data from
%s
'
%
INFILE
)
print
(
'Minimum installation size:
%.1f
m2'
%
MIN_SIZE
)
print
(
'DBSCAN parameters: EPS =
%.2f
, MIN_SAMPLE =
%d
'
%
(
EPS
,
MIN_SAMPLE
))
print
(
'Save output to
%s
\n
'
%
OUTFILE
)
#### Load data
all_pixels
=
pd
.
read_csv
(
INFILE
)
#### Perform clustering
tt
=
time
.
time
()
all_pixels
[
'cluster_ID'
]
=
DBSCAN
(
eps
=
EPS
,
min_samples
=
MIN_SAMPLE
,
n_jobs
=-
1
)
.
fit_predict
(
all_pixels
[
COORDS
])
print
(
'Clustered data in
%.2f
s'
%
(
time
.
time
()
-
tt
))
# Exclude all those without an assigned cluster
PV_clusters
=
all_pixels
[
all_pixels
.
cluster_ID
>=
0
]
# Get those clusters with a valid size
PV_cluster_area
=
PV_clusters
.
groupby
(
'cluster_ID'
)
.
sum
()
.
pixel_area
valid_clusters
=
PV_cluster_area
[
PV_cluster_area
>=
MIN_SIZE
]
clustered_pixels
=
all_pixels
[
all_pixels
.
cluster_ID
.
isin
(
valid_clusters
.
index
.
values
)]
print
(
clustered_pixels
.
head
()
)
# Save output
clustered_pixels
.
to_csv
(
OUTFILE
,
index
=
False
)
print
(
'Saved
%s
- DONE'
%
OUTFILE
)
Event Timeline
Log In to Comment