Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F111424822
spatial_join_pv_new.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, May 1, 13:36
Size
11 KB
Mime Type
text/x-python
Expires
Sat, May 3, 13:36 (2 d)
Engine
blob
Format
Raw Data
Handle
25914267
Attached To
R8797 solarPV
spatial_join_pv_new.py
View Options
import
numpy
as
np
import
pandas
as
pd
import
geopandas
as
gpd
import
os
import
sys
import
time
import
shutil
#### Function definitions
def
coords_to_point
(
df
,
coord_names
=
[
'x'
,
'y'
]
):
xy
=
df
[
coord_names
]
.
values
return
gpd
.
GeoDataFrame
(
df
,
geometry
=
gpd
.
points_from_xy
(
xy
[:,
0
],
xy
[:,
1
]))
def
load_file
(
fp
,
columns
=
None
,
rows
=
None
,
crs
=
None
,
bbox
=
None
,
**
kwargs
):
if
columns
is
not
None
:
all_cols
=
gpd
.
read_file
(
fp
,
rows
=
0
,
**
kwargs
)
.
columns
ignore_cols
=
list
(
all_cols
[
np
.
logical_not
(
all_cols
.
isin
(
columns
))])
else
:
ignore_cols
=
None
return
gpd
.
read_file
(
fp
,
ignore_fields
=
ignore_cols
,
rows
=
rows
,
bbox
=
bbox
,
**
kwargs
)
def
select_by_coords
(
df
,
gdf
,
coord_names
=
[
'x'
,
'y'
]):
bounds
=
gdf
.
bounds
X0
=
bounds
.
minx
.
min
()
X1
=
bounds
.
maxx
.
max
()
Y0
=
bounds
.
miny
.
min
()
Y1
=
bounds
.
maxy
.
max
()
return
df
[(
df
[
coord_names
[
0
]]
>=
X0
)
&
(
df
[
coord_names
[
0
]]
<=
X1
)
&
(
df
[
coord_names
[
1
]]
>=
Y0
)
&
(
df
[
coord_names
[
1
]]
<=
Y1
)]
#### Variable definitions
## INPUTS
PV_FP
=
sys
.
argv
[
1
]
# '/work/hyenergy/output/CNN_pv_predictions/pv_predictions_geneva.csv'
MASK_FP
=
sys
.
argv
[
2
]
# geospatial limitation (geodataframe) if perimeter of analysis
OUT_FP
=
sys
.
argv
[
3
]
# '/scratch/walch/pv_linking/PV_detected.csv'
# Optional inputs
if
len
(
sys
.
argv
)
>
4
:
BLD_BUFFER
=
float
(
sys
.
argv
[
4
])
# Distance by which to buffer BLD footprint shapes
else
:
BLD_BUFFER
=
3
if
len
(
sys
.
argv
)
>
6
:
DEFAULT_TILT
=
int
(
sys
.
argv
[
5
])
DEFAULT_ASPECT
=
int
(
sys
.
argv
[
6
])
else
:
DEFAULT_TILT
=
0
# Default tilt angle assumed on roofs without rooftop info
DEFAULT_ASPECT
=
-
180
# Default roof aspect assumed on roofs without rooftop info
print
(
'
\n
Default tilt for roofs without rooftop info:
%d
'
%
DEFAULT_TILT
)
print
(
'Default aspect for roofs without rooftop info:
%d
'
%
DEFAULT_ASPECT
)
print
(
'BLD buffer width:
%d
m'
%
BLD_BUFFER
)
print
(
'Load PV pixels from
%s
'
%
PV_FP
)
print
(
'Load mask from
%s
'
%
MASK_FP
)
print
(
'Save output to
%s
\n
'
%
OUT_FP
)
## Constants
BATCH_SIZE
=
100000
## 100000 # size of batches to process spatial join (executed in for-loop)
PIXEL_RES
=
0.25
COORDS
=
[
'x'
,
'y'
]
# CONSTANT FILES (national files)
# TLM_PATH = '/work/hyenergy/raw/Building_data/swissTLM3D/Update swissTLM3D 2017/LV95/swissTLM3D_LV95_LN02.gdb'
# TLM_LAYR = 'TLM_GEBAEUDE_FOOTPRINT'
# TLM_COLS = ['UUID', 'geometry']
BLD_PATH
=
'/work/hyenergy/output/buildings/buildings_CH/BLD_LV95_CH'
ROOF_PATH
=
'/work/hyenergy/raw/Sonnendach/Sonnendach_all_roofs/Sonnendach_CH.gdb'
ROOF_LAYR
=
'SOKAT_CH_DACH'
# Roof information to inherit ('FLAECHE', 'NEIGUNG', 'AUSRICHTUNG' are compulsory)
ROOF_COLS
=
[
'geometry'
,
'DF_UID'
,
'FLAECHE'
,
'NEIGUNG'
,
'AUSRICHTUNG'
]
roof_col_dict
=
{
'FLAECHE'
:
'ROOF_AREA'
,
'NEIGUNG'
:
'ROOF_TILT'
,
'AUSRICHTUNG'
:
'ROOF_ASPECT'
}
# Files that link to BLD_ID
ROOF_LINK
=
'/work/hyenergy/output/buildings/buildings_CH/SOLKAT_DACH_CH_BLD_ID.csv'
# TLM_LINK = '/work/hyenergy/output/buildings/buildings_CH/TLM_CH_BLD_ID.csv'
TMP_DIR
=
os
.
path
.
join
(
os
.
path
.
split
(
OUT_FP
)[
0
],
'tmp'
)
try
:
os
.
mkdir
(
TMP_DIR
)
except
:
print
(
'
%s
exists'
%
TMP_DIR
)
#### Load data
## Load mask
MASK
=
gpd
.
read_file
(
MASK_FP
)
## Load roofs
tt
=
time
.
time
()
# load roof polygons & link information to BLD
ROOFS
=
load_file
(
ROOF_PATH
,
columns
=
ROOF_COLS
,
layer
=
ROOF_LAYR
,
mask
=
MASK
)
roof_link
=
pd
.
read_csv
(
ROOF_LINK
)
.
set_index
(
'DF_UID'
)
.
drop
(
columns
=
[
'batchID'
])
print
(
'
\n
Loaded roof data in
%.2f
s'
%
(
time
.
time
()
-
tt
))
# Merge BLD_ID to roofs
ROOFS
=
ROOFS
.
merge
(
roof_link
,
on
=
'DF_UID'
,
how
=
'left'
)
# rename columns
# NOTE: roof aspect defined as clockwise degrees from south (flat roofs are assigned a north aspect)
ROOFS
=
ROOFS
.
rename
(
roof_col_dict
,
axis
=
1
)
print
(
ROOFS
.
head
())
## Buffered BLD data
tt
=
time
.
time
()
# Load BLD data
BLD
=
load_file
(
BLD_PATH
,
mask
=
MASK
)
.
drop
(
columns
=
[
'batchID'
])
print
(
'
\n
Loaded BLD data in
%.2f
s'
%
(
time
.
time
()
-
tt
))
# Buffer by BLD_BUFFER meters
BLD_BUF
=
BLD
.
copy
()
BLD_BUF
[
'geometry'
]
=
BLD
.
buffer
(
BLD_BUFFER
,
join_style
=
2
)
BLD_BUF
[
'ROOF_AREA'
]
=
BLD
.
geometry
.
area
# non-buffered area
BLD_BUF
[
'ROOF_TILT'
]
=
DEFAULT_TILT
BLD_BUF
[
'ROOF_ASPECT'
]
=
DEFAULT_ASPECT
print
(
BLD_BUF
.
head
())
## PV pixels
tt
=
time
.
time
()
PV_data
=
pd
.
read_csv
(
PV_FP
,
header
=
None
,
names
=
COORDS
)
PV_data
[
'pixel_ID'
]
=
list
(
range
(
len
(
PV_data
)))
print
(
'
\n
Loaded pixels in
%.2f
s'
%
(
time
.
time
()
-
tt
))
# Convert pixel coordinates to LV95 coordinate system
PV_data
[
COORDS
[
0
]]
=
PV_data
[
COORDS
[
0
]]
+
2e6
PV_data
[
COORDS
[
1
]]
=
PV_data
[
COORDS
[
1
]]
+
1e6
# Select only those pixels within the bounding box of mask
PV_data
=
select_by_coords
(
PV_data
,
MASK
)
# assign the pixel area
PV_data
[
'pixel_area'
]
=
PIXEL_RES
**
2
print
(
PV_data
.
head
())
#### Spatial join pixels to roofs/buildings
N_BATCHES
=
int
(
np
.
ceil
(
len
(
PV_data
)
/
BATCH_SIZE
))
print
(
'
\n
Assigning pixels to roofs/buildings in
%d
batches'
%
N_BATCHES
)
all_pixel_roof
=
[]
all_pixel_bld
=
[]
for
batch
in
range
(
N_BATCHES
):
t0
=
time
.
time
()
start
=
batch
*
BATCH_SIZE
end
=
(
batch
+
1
)
*
BATCH_SIZE
# Convert pixel data to point data
tt
=
time
.
time
()
PV_pixels
=
coords_to_point
(
PV_data
[
start
:
end
]
.
copy
(),
coord_names
=
COORDS
)
print
(
'Converted coords to point data in
%.2f
s'
%
(
time
.
time
()
-
tt
))
# Add the coordinate system of PV_pixels (LV95)
PV_pixels
.
crs
=
'epsg:2056'
PV_pixels
.
crs
if
PV_pixels
.
crs
!=
ROOFS
.
crs
:
tt
=
time
.
time
()
PV_pixels
=
PV_pixels
.
to_crs
(
ROOFS
.
crs
)
print
(
'Reprojected data in
%.2f
s'
%
(
time
.
time
()
-
tt
))
# spatial join pixels and roofs (keep only intersecting pixels)
tt
=
time
.
time
()
pxl_per_roof
=
gpd
.
sjoin
(
PV_pixels
,
ROOFS
,
op
=
'within'
)
.
drop
(
columns
=
[
'index_right'
,
'geometry'
]
)
print
(
'Spatial joined pixels with roofs in
%.2f
s'
%
(
time
.
time
()
-
tt
))
if
pxl_per_roof
.
duplicated
(
subset
=
COORDS
)
.
any
():
print
(
'WARNING: SOME PIXELS ARE DUPLICATED'
)
all_pixel_roof
.
append
(
pxl_per_roof
)
### Handle remaining roofs
remaining_pixels
=
PV_pixels
[
np
.
logical_not
(
PV_pixels
.
pixel_ID
.
isin
(
pxl_per_roof
.
pixel_ID
.
values
))
]
if
len
(
remaining_pixels
)
==
0
:
print
(
'No remaining pixels'
)
continue
if
PV_pixels
.
crs
!=
BLD_BUF
.
crs
:
print
(
'Reprojecting data'
)
PV_pixels
=
PV_pixels
.
to_crs
(
BLD_BUF
.
crs
)
# spatial join pixels and buffered buildings (keep only intersecting pixels)
tt
=
time
.
time
()
pxl_per_bld
=
gpd
.
sjoin
(
remaining_pixels
,
BLD_BUF
,
op
=
'within'
)
.
drop
(
columns
=
[
'index_right'
,
'geometry'
]
)
print
(
'Spatial joined pixels with buffered buildings in
%.2f
s'
%
(
time
.
time
()
-
tt
))
print
(
'Duplicates:'
,
pxl_per_bld
.
duplicated
(
subset
=
COORDS
)
.
any
())
# Handle duplicates
if
pxl_per_bld
.
duplicated
(
subset
=
COORDS
)
.
any
():
# Extract the duplicates
duplicates
=
pxl_per_bld
[
pxl_per_bld
.
duplicated
(
subset
=
COORDS
,
keep
=
False
)]
.
copy
()
px_count
=
pxl_per_bld
.
groupby
(
'BLD_ID'
)
.
pixel_ID
.
count
()
# For each duplicate, choose the one with most pixels
duplicates
[
'px_count'
]
=
duplicates
.
BLD_ID
.
map
(
px_count
)
bld_w_highest_pxCount
=
(
duplicates
.
sort_values
(
'px_count'
,
ascending
=
False
)
.
drop_duplicates
(
subset
=
COORDS
,
keep
=
'first'
)
.
drop
(
columns
=
[
'px_count'
]
)
)
# Add selected values to the pxl_per_bld
pxl_per_bld
=
pd
.
concat
([
pxl_per_bld
.
drop_duplicates
(
subset
=
COORDS
,
keep
=
False
),
bld_w_highest_pxCount
])
if
pxl_per_bld
.
duplicated
(
subset
=
COORDS
)
.
any
():
print
(
'WARNING: SOME PIXELS ARE DUPLICATED'
)
## Try to assign remaining pixels to rooftops by shortest distance
# Convert remaining pixels on buildings to point data and set pixel_ID to index for correct assignment below
px_BLD
=
coords_to_point
(
pxl_per_bld
.
copy
(),
coord_names
=
COORDS
)
px_BLD
.
index
=
px_BLD
.
pixel_ID
BLD_IDs
=
px_BLD
.
BLD_ID
.
unique
()
roof_BID
=
ROOFS
.
BLD_ID
.
unique
()
px_assigned_DF_UID
=
[]
tt
=
time
.
time
()
for
i
,
ID
in
enumerate
(
BLD_IDs
):
# Skip buildings without roof
if
ID
not
in
roof_BID
:
continue
# Select roofs and pixels to assign
curr_roofs
=
ROOFS
[
ROOFS
.
BLD_ID
==
ID
]
curr_px
=
px_BLD
[
px_BLD
.
BLD_ID
==
ID
]
# Check which roof is the closest out of those in question (iterate over roofs):
dist
=
{}
for
idx
,
roof
in
curr_roofs
.
iterrows
():
dist
[
roof
.
DF_UID
]
=
curr_px
.
distance
(
roof
.
geometry
)
dist
=
pd
.
DataFrame
(
dist
)
# Extract the DF_UID for the roof with at max. 3m distance to roof and add to list
dist_max3
=
dist
[
dist
.
min
(
axis
=
1
)
<=
3
]
px_assigned_DF_UID
.
append
(
dist_max3
.
idxmin
(
axis
=
1
)
)
print
(
'Assignment of buffer zones in
%.2f
s'
%
(
time
.
time
()
-
tt
))
if
len
(
px_assigned_DF_UID
)
==
0
:
# if no pixels have been assigned to DF_UIDs: proceed as before
px_per_roof_all
=
pxl_per_roof
px_no_roof
=
pxl_per_bld
else
:
px_assigned_DF_UID
=
pd
.
concat
(
px_assigned_DF_UID
)
# Get those pixels which now have a roof
px_with_roof
=
px_BLD
.
loc
[
px_assigned_DF_UID
.
index
,
PV_pixels
.
columns
]
.
drop
(
columns
=
[
'geometry'
]
)
px_with_roof
[
'DF_UID'
]
=
px_with_roof
.
index
.
map
(
px_assigned_DF_UID
)
# Attach data
px_with_roof_data
=
px_with_roof
.
merge
(
ROOFS
.
drop
(
columns
=
[
'geometry'
]
),
on
=
'DF_UID'
)
px_with_roof_data
.
index
=
np
.
ones
(
len
(
px_with_roof_data
))
all_pixel_roof
.
append
(
px_with_roof_data
)
px_per_roof_all
=
pd
.
concat
([
pxl_per_roof
,
px_with_roof_data
])
# Get those pixels which still don't have a roof
px_no_roof
=
pxl_per_bld
[
~
pxl_per_bld
.
pixel_ID
.
isin
(
px_assigned_DF_UID
.
index
)
]
all_pixel_bld
.
append
(
px_no_roof
)
print
(
px_per_roof_all
.
head
())
print
(
px_no_roof
.
head
())
# Save intermediate data
px_per_roof_all
.
to_csv
(
os
.
path
.
join
(
TMP_DIR
,
'pxl_per_roof_
%d
.csv'
%
batch
),
index
=
False
)
px_no_roof
.
to_csv
(
os
.
path
.
join
(
TMP_DIR
,
'pxl_per_bld_
%d
.csv'
%
batch
),
index
=
False
)
print
(
'Finished iteration
%d
in
%.2f
s
\n
'
%
(
batch
,
time
.
time
()
-
t0
)
)
# Merge all batches
all_pixel_roof
=
pd
.
concat
(
all_pixel_roof
)
print
(
all_pixel_roof
.
head
())
all_pixel_bld
=
pd
.
concat
(
all_pixel_bld
)
print
(
all_pixel_bld
.
head
())
# Merge building and roof data
all_pixels
=
pd
.
concat
([
all_pixel_roof
,
all_pixel_bld
],
sort
=
False
)
# Computed the tilted PV area
all_pixels
[
'PV_area'
]
=
all_pixels
[
'pixel_area'
]
/
np
.
cos
(
np
.
deg2rad
(
all_pixels
[
'ROOF_TILT'
]
)
)
print
(
all_pixels
.
head
())
print
(
'
\n
Filtered pixels:
%d
of
%d
\n
'
%
(
len
(
all_pixels
),
len
(
PV_data
)))
# Convert back to LV03 coordinate system
all_pixels
[
COORDS
[
0
]]
=
all_pixels
[
COORDS
[
0
]]
-
2e6
all_pixels
[
COORDS
[
1
]]
=
all_pixels
[
COORDS
[
1
]]
-
1e6
tt
=
time
.
time
()
all_pixels
.
to_csv
(
OUT_FP
,
index
=
False
)
print
(
'Saved
%s
- DONE in
%.2f
s'
%
(
OUT_FP
,
time
.
time
()
-
tt
))
## Try to remove tree; if failed show an error using try...except on screen
try
:
shutil
.
rmtree
(
TMP_DIR
)
except
OSError
as
e
:
print
(
"Error:
%s
-
%s
."
%
(
e
.
filename
,
e
.
strerror
))
Event Timeline
Log In to Comment