Page MenuHomec4science

spatial_join_pv_new.py
No OneTemporary

File Metadata

Created
Thu, May 1, 13:36

spatial_join_pv_new.py

import numpy as np
import pandas as pd
import geopandas as gpd
import os
import sys
import time
import shutil
#### Function definitions
def coords_to_point( df, coord_names = ['x', 'y'] ):
xy = df[ coord_names ].values
return gpd.GeoDataFrame( df, geometry = gpd.points_from_xy(xy[:,0], xy[:,1]))
def load_file(fp, columns = None, rows = None, crs = None, bbox = None, **kwargs):
if columns is not None:
all_cols = gpd.read_file( fp, rows = 0, **kwargs ).columns
ignore_cols = list( all_cols[ np.logical_not( all_cols.isin( columns ))])
else:
ignore_cols = None
return gpd.read_file( fp, ignore_fields = ignore_cols, rows = rows, bbox = bbox, **kwargs)
def select_by_coords(df, gdf, coord_names = ['x', 'y']):
bounds = gdf.bounds
X0 = bounds.minx.min()
X1 = bounds.maxx.max()
Y0 = bounds.miny.min()
Y1 = bounds.maxy.max()
return df[(df[coord_names[0]] >= X0) & (df[coord_names[0]] <= X1) & (df[coord_names[1]] >= Y0) & (df[coord_names[1]] <= Y1)]
#### Variable definitions
## INPUTS
PV_FP = sys.argv[1] # '/work/hyenergy/output/CNN_pv_predictions/pv_predictions_geneva.csv'
MASK_FP = sys.argv[2] # geospatial limitation (geodataframe) if perimeter of analysis
OUT_FP = sys.argv[3] # '/scratch/walch/pv_linking/PV_detected.csv'
# Optional inputs
if len(sys.argv) > 4:
BLD_BUFFER = float(sys.argv[4]) # Distance by which to buffer BLD footprint shapes
else:
BLD_BUFFER = 3
if len(sys.argv) > 6:
DEFAULT_TILT = int(sys.argv[5])
DEFAULT_ASPECT = int(sys.argv[6])
else:
DEFAULT_TILT = 0 # Default tilt angle assumed on roofs without rooftop info
DEFAULT_ASPECT = -180 # Default roof aspect assumed on roofs without rooftop info
print('\nDefault tilt for roofs without rooftop info: %d' %DEFAULT_TILT)
print('Default aspect for roofs without rooftop info: %d' %DEFAULT_ASPECT)
print('BLD buffer width: %dm' %BLD_BUFFER)
print('Load PV pixels from %s' %PV_FP)
print('Load mask from %s' %MASK_FP)
print('Save output to %s\n' %OUT_FP)
## Constants
BATCH_SIZE = 100000 ## 100000 # size of batches to process spatial join (executed in for-loop)
PIXEL_RES = 0.25
COORDS = ['x', 'y']
# CONSTANT FILES (national files)
# TLM_PATH = '/work/hyenergy/raw/Building_data/swissTLM3D/Update swissTLM3D 2017/LV95/swissTLM3D_LV95_LN02.gdb'
# TLM_LAYR = 'TLM_GEBAEUDE_FOOTPRINT'
# TLM_COLS = ['UUID', 'geometry']
BLD_PATH = '/work/hyenergy/output/buildings/buildings_CH/BLD_LV95_CH'
ROOF_PATH = '/work/hyenergy/raw/Sonnendach/Sonnendach_all_roofs/Sonnendach_CH.gdb'
ROOF_LAYR = 'SOKAT_CH_DACH'
# Roof information to inherit ('FLAECHE', 'NEIGUNG', 'AUSRICHTUNG' are compulsory)
ROOF_COLS = ['geometry', 'DF_UID', 'FLAECHE', 'NEIGUNG', 'AUSRICHTUNG']
roof_col_dict = {'FLAECHE' : 'ROOF_AREA', 'NEIGUNG' : 'ROOF_TILT', 'AUSRICHTUNG' : 'ROOF_ASPECT'}
# Files that link to BLD_ID
ROOF_LINK = '/work/hyenergy/output/buildings/buildings_CH/SOLKAT_DACH_CH_BLD_ID.csv'
# TLM_LINK = '/work/hyenergy/output/buildings/buildings_CH/TLM_CH_BLD_ID.csv'
TMP_DIR = os.path.join( os.path.split(OUT_FP)[0], 'tmp' )
try:
os.mkdir( TMP_DIR )
except:
print('%s exists' %TMP_DIR)
#### Load data
## Load mask
MASK = gpd.read_file( MASK_FP )
## Load roofs
tt = time.time()
# load roof polygons & link information to BLD
ROOFS = load_file( ROOF_PATH , columns = ROOF_COLS, layer = ROOF_LAYR, mask = MASK )
roof_link = pd.read_csv(ROOF_LINK).set_index('DF_UID').drop(columns = ['batchID'])
print('\nLoaded roof data in %.2fs' %(time.time()-tt))
# Merge BLD_ID to roofs
ROOFS = ROOFS.merge(roof_link, on = 'DF_UID', how = 'left')
# rename columns
# NOTE: roof aspect defined as clockwise degrees from south (flat roofs are assigned a north aspect)
ROOFS = ROOFS.rename( roof_col_dict, axis = 1 )
print(ROOFS.head())
## Buffered BLD data
tt = time.time()
# Load BLD data
BLD = load_file( BLD_PATH, mask = MASK ).drop(columns = ['batchID'])
print('\nLoaded BLD data in %.2fs' %(time.time()-tt))
# Buffer by BLD_BUFFER meters
BLD_BUF = BLD.copy()
BLD_BUF['geometry'] = BLD.buffer( BLD_BUFFER, join_style = 2 )
BLD_BUF['ROOF_AREA'] = BLD.geometry.area # non-buffered area
BLD_BUF['ROOF_TILT'] = DEFAULT_TILT
BLD_BUF['ROOF_ASPECT'] = DEFAULT_ASPECT
print(BLD_BUF.head())
## PV pixels
tt = time.time()
PV_data = pd.read_csv( PV_FP, header = None, names = COORDS )
PV_data['pixel_ID'] = list(range(len(PV_data)))
print('\nLoaded pixels in %.2fs' %(time.time()-tt))
# Convert pixel coordinates to LV95 coordinate system
PV_data[COORDS[0]] = PV_data[COORDS[0]] + 2e6
PV_data[COORDS[1]] = PV_data[COORDS[1]] + 1e6
# Select only those pixels within the bounding box of mask
PV_data = select_by_coords(PV_data, MASK)
# assign the pixel area
PV_data['pixel_area'] = PIXEL_RES**2
print(PV_data.head())
#### Spatial join pixels to roofs/buildings
N_BATCHES = int(np.ceil( len(PV_data)/ BATCH_SIZE ))
print('\nAssigning pixels to roofs/buildings in %d batches' %N_BATCHES)
all_pixel_roof = []
all_pixel_bld = []
for batch in range( N_BATCHES ):
t0 = time.time()
start = batch * BATCH_SIZE
end = (batch + 1) * BATCH_SIZE
# Convert pixel data to point data
tt = time.time()
PV_pixels = coords_to_point(PV_data[ start:end ].copy(), coord_names = COORDS )
print('Converted coords to point data in %.2fs' %(time.time()-tt))
# Add the coordinate system of PV_pixels (LV95)
PV_pixels.crs = 'epsg:2056'
PV_pixels.crs
if PV_pixels.crs != ROOFS.crs:
tt = time.time()
PV_pixels = PV_pixels.to_crs( ROOFS.crs )
print('Reprojected data in %.2fs' %(time.time()-tt))
# spatial join pixels and roofs (keep only intersecting pixels)
tt = time.time()
pxl_per_roof = gpd.sjoin( PV_pixels, ROOFS, op = 'within' ).drop( columns = ['index_right', 'geometry'] )
print('Spatial joined pixels with roofs in %.2fs' %(time.time()-tt))
if pxl_per_roof.duplicated( subset = COORDS ).any():
print('WARNING: SOME PIXELS ARE DUPLICATED')
all_pixel_roof.append( pxl_per_roof )
### Handle remaining roofs
remaining_pixels = PV_pixels[ np.logical_not(PV_pixels.pixel_ID.isin( pxl_per_roof.pixel_ID.values )) ]
if len(remaining_pixels) == 0:
print('No remaining pixels')
continue
if PV_pixels.crs != BLD_BUF.crs:
print('Reprojecting data')
PV_pixels = PV_pixels.to_crs( BLD_BUF.crs )
# spatial join pixels and buffered buildings (keep only intersecting pixels)
tt = time.time()
pxl_per_bld = gpd.sjoin( remaining_pixels, BLD_BUF, op = 'within' ).drop( columns = ['index_right', 'geometry'] )
print('Spatial joined pixels with buffered buildings in %.2fs' %(time.time()-tt))
print('Duplicates:', pxl_per_bld.duplicated( subset = COORDS ).any())
# Handle duplicates
if pxl_per_bld.duplicated( subset = COORDS ).any():
# Extract the duplicates
duplicates = pxl_per_bld[pxl_per_bld.duplicated( subset = COORDS, keep = False )].copy()
px_count = pxl_per_bld.groupby('BLD_ID').pixel_ID.count()
# For each duplicate, choose the one with most pixels
duplicates['px_count'] = duplicates.BLD_ID.map( px_count )
bld_w_highest_pxCount = ( duplicates.sort_values('px_count', ascending = False)
.drop_duplicates( subset = COORDS, keep = 'first' ).drop( columns = ['px_count'] ) )
# Add selected values to the pxl_per_bld
pxl_per_bld = pd.concat([ pxl_per_bld.drop_duplicates( subset=COORDS, keep=False ), bld_w_highest_pxCount ])
if pxl_per_bld.duplicated( subset = COORDS ).any():
print('WARNING: SOME PIXELS ARE DUPLICATED')
## Try to assign remaining pixels to rooftops by shortest distance
# Convert remaining pixels on buildings to point data and set pixel_ID to index for correct assignment below
px_BLD = coords_to_point( pxl_per_bld.copy(), coord_names = COORDS )
px_BLD.index = px_BLD.pixel_ID
BLD_IDs = px_BLD.BLD_ID.unique()
roof_BID = ROOFS.BLD_ID.unique()
px_assigned_DF_UID = []
tt = time.time()
for i,ID in enumerate(BLD_IDs):
# Skip buildings without roof
if ID not in roof_BID:
continue
# Select roofs and pixels to assign
curr_roofs = ROOFS[ROOFS.BLD_ID == ID]
curr_px = px_BLD[px_BLD.BLD_ID == ID]
# Check which roof is the closest out of those in question (iterate over roofs):
dist = {}
for idx, roof in curr_roofs.iterrows():
dist[roof.DF_UID] = curr_px.distance(roof.geometry)
dist = pd.DataFrame(dist)
# Extract the DF_UID for the roof with at max. 3m distance to roof and add to list
dist_max3 = dist[dist.min(axis = 1) <= 3]
px_assigned_DF_UID.append( dist_max3.idxmin(axis = 1) )
print('Assignment of buffer zones in %.2fs' %(time.time()-tt))
if len(px_assigned_DF_UID) == 0: # if no pixels have been assigned to DF_UIDs: proceed as before
px_per_roof_all = pxl_per_roof
px_no_roof = pxl_per_bld
else:
px_assigned_DF_UID = pd.concat(px_assigned_DF_UID)
# Get those pixels which now have a roof
px_with_roof = px_BLD.loc[ px_assigned_DF_UID.index, PV_pixels.columns ].drop( columns = ['geometry'] )
px_with_roof['DF_UID'] = px_with_roof.index.map(px_assigned_DF_UID)
# Attach data
px_with_roof_data = px_with_roof.merge(ROOFS.drop( columns = ['geometry'] ), on = 'DF_UID')
px_with_roof_data.index = np.ones(len(px_with_roof_data))
all_pixel_roof.append( px_with_roof_data )
px_per_roof_all = pd.concat([pxl_per_roof, px_with_roof_data])
# Get those pixels which still don't have a roof
px_no_roof = pxl_per_bld[ ~pxl_per_bld.pixel_ID.isin(px_assigned_DF_UID.index) ]
all_pixel_bld.append( px_no_roof )
print(px_per_roof_all.head())
print(px_no_roof.head())
# Save intermediate data
px_per_roof_all.to_csv( os.path.join( TMP_DIR, 'pxl_per_roof_%d.csv' %batch ), index = False )
px_no_roof.to_csv( os.path.join( TMP_DIR, 'pxl_per_bld_%d.csv' %batch ), index = False )
print('Finished iteration %d in %.2fs\n' %(batch, time.time()-t0) )
# Merge all batches
all_pixel_roof = pd.concat(all_pixel_roof)
print(all_pixel_roof.head())
all_pixel_bld = pd.concat(all_pixel_bld )
print(all_pixel_bld.head())
# Merge building and roof data
all_pixels = pd.concat([all_pixel_roof, all_pixel_bld], sort = False)
# Computed the tilted PV area
all_pixels['PV_area'] = all_pixels['pixel_area'] / np.cos( np.deg2rad( all_pixels['ROOF_TILT'] ) )
print(all_pixels.head())
print('\nFiltered pixels: %d of %d\n' %(len(all_pixels), len(PV_data)))
# Convert back to LV03 coordinate system
all_pixels[COORDS[0]] = all_pixels[COORDS[0]] - 2e6
all_pixels[COORDS[1]] = all_pixels[COORDS[1]] - 1e6
tt = time.time()
all_pixels.to_csv( OUT_FP, index = False )
print('Saved %s - DONE in %.2fs' %(OUT_FP, time.time()-tt))
## Try to remove tree; if failed show an error using try...except on screen
try:
shutil.rmtree(TMP_DIR)
except OSError as e:
print ("Error: %s - %s." % (e.filename, e.strerror))

Event Timeline