Page MenuHomec4science

create_building_polygons.py
No OneTemporary

File Metadata

Created
Sat, Jul 5, 06:42

create_building_polygons.py

# coding: utf-8
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import os
import time
import sys
from geo_util import *
# # Load data
TLM_PATH = sys.argv[1] # '/work/hyenergy/raw/Building_data/swissTLM3D/TLM_FOOTPRINT/TLM_FOOTPRINT_GVA.shp'
ROOF_PATH = sys.argv[2] # '/work/hyenergy/output/solar_potential/geographic_potential/available_area/panel_fitting/SOLKAT_DACH_GVA'
OUTNAME = sys.argv[3]
OUT_FOLDER= '/scratch/walch/pv_linking/'
# ## TLM data
TLM_COLS = ['OBJECTID', 'UUID', 'OBJEKTART', 'SHAPE_Area', 'geometry']
tt = time.time()
TLM = load_file( TLM_PATH, columns = TLM_COLS )
print('Loaded TLM geometries in %.2fs' %(time.time()-tt))
# ## Roof data
ROOF_COLS = ['geometry', 'DF_UID', 'SB_UUID', 'GWR_EGID', 'FLAECHE', 'NEIGUNG', 'AUSRICHTUN']
tt = time.time()
ROOFS = load_file( ROOF_PATH, columns = ROOF_COLS )
print('Loaded roof geometries in %.2fs' %(time.time()-tt))
# ### Geographic projection:
tt = time.time()
ROOFS = ROOFS.to_crs( TLM.crs )
print('Transformed roof coordinates in %.2fs' %(time.time()-tt))
# # Dissolve and split polygons
TLM_BLD = dissolve_overlapping_polygons(TLM[['geometry']])
TLM_BLD = TLM_BLD.drop(columns = 'index').reset_index().rename({'index' : 'BLD_ID'}, axis = 1)
tt = time.time()
TLM_BLD.to_file( os.path.join(OUT_FOLDER, 'TLM_%s_dissolved' %OUTNAME) )
print('Saved %s in %.2fs' %(os.path.join(OUT_FOLDER, 'TLM_%s_dissolved' %OUTNAME), time.time()-tt))
# # Link SB_UUID to BLD_ID
# TLM_BLD = gpd.read_file( os.path.join(OUT_FOLDER, 'TLM_%s_dissolved' %OUTNAME) )
tt = time.time()
TLM_w_ID = gpd.overlay( TLM, TLM_BLD, how = 'intersection' )
print('Overlaid dissolved TLM with original TLM in %.2fs' %(time.time()-tt))
# Clean duplicates
TLM_w_ID['area'] = TLM_w_ID.geometry.area
TLM_w_ID = TLM_w_ID.sort_values('area', ascending = False)
TLM_w_ID = TLM_w_ID.drop_duplicates(subset = 'UUID', keep = 'first').drop(columns = ['area'])
print('Lengths of TLM subsets: TLM_w_ID, TLM, TLM_w_ID.dropna()')
print(len(TLM_w_ID))
print(len(TLM))
print(len(TLM_w_ID.dropna()))
TLM_ID_info = pd.DataFrame( TLM_w_ID.drop(columns = ['geometry' ]) )
TLM_ID_info.to_csv( os.path.join(OUT_FOLDER, 'TLM_BLD_ID_%s_info.csv' %OUTNAME) , index = False)
print('Saved %s' %os.path.join(OUT_FOLDER, 'TLM_BLD_ID_%s_info.csv' %OUTNAME))
# # Intersect with roofs
# ### Merge UUIDs
tt = time.time()
ROOFS_w_BLD = ROOFS.merge( TLM_w_ID[['UUID', 'BLD_ID']].rename({'UUID' : 'SB_UUID'}, axis = 1).dropna(),
on = 'SB_UUID', how = 'inner')
print('Merged rooftop ID to BLD in %.2fs' %(time.time()-tt))
print('Lengths of ROOF subsets: ROOFS_w_BLD, ROOFS, ROOFS_w_BLD.SB_UUID.unique(), ROOFS.SB_UUID.unique()')
print(len(ROOFS_w_BLD))
print(len(ROOFS))
print(len(ROOFS_w_BLD.SB_UUID.unique()))
print(len(ROOFS.SB_UUID.unique()))
# ### Check roofs that are not found
remaining_roofs = ROOFS[np.logical_not(ROOFS.SB_UUID.isin( ROOFS_w_BLD.SB_UUID.values ))]
invalid_geometry = remaining_roofs[remaining_roofs.geometry.is_valid == False ]
remaining_roofs.loc[invalid_geometry.index, 'geometry'] = invalid_geometry.geometry.buffer(0)
# ### For roofs that are not found, merge and intersect with TLM_BLD
tt = time.time()
SB_UUIDs = remaining_roofs[['geometry', 'SB_UUID']].dissolve(by = 'SB_UUID').reset_index()
print('Dissolved remaining roofs in %.2fs' %(time.time()-tt))
tt = time.time()
SB_UUID_w_ID = gpd.overlay( SB_UUIDs, TLM_BLD, how = 'intersection' )
print('Overlaid SB_UUID with TLM_BLD in %.2fs' %(time.time()-tt))
# Clean duplicates
SB_UUID_w_ID['area'] = SB_UUID_w_ID.geometry.area
SB_UUID_w_ID = SB_UUID_w_ID.sort_values('area', ascending = False)
SB_UUID_w_ID = SB_UUID_w_ID.drop_duplicates(subset = 'SB_UUID', keep = 'first').drop(columns = ['area'])
# ### Merge information with mapped roofs
ROOFS_w_BLD = pd.concat([ ROOFS_w_BLD, SB_UUID_w_ID[['SB_UUID', 'BLD_ID']].merge(ROOFS, on = 'SB_UUID') ],
sort = True)
print('Lengths of ROOF subsets after cleaning: ROOFS_w_BLD, ROOFS, ROOFS_w_BLD.SB_UUID.unique(), ROOFS.SB_UUID.unique()')
print(len(ROOFS_w_BLD))
print(len(ROOFS))
print(len(ROOFS_w_BLD.SB_UUID.unique()))
print(len(ROOFS.SB_UUID.unique()))
# ### Get yet remaining roofs
remaining_SB = SB_UUIDs[np.logical_not(SB_UUIDs.SB_UUID.isin(ROOFS_w_BLD.SB_UUID.values))]
# remaining_SB['geometry'] = remaining_SB.geometry.centroid
# ### Assign new BLD_ID to yet remaning SB_UUIDs
remaining_SB['BLD_ID'] = np.arange( TLM_BLD.BLD_ID.max() + 1, TLM_BLD.BLD_ID.max() + len(remaining_SB) + 1 )
print('Added BLD_ID for non-found geometries')
print(remaining_SB)
# ### Merge with roof info
ROOFS_w_BLD = pd.concat([ ROOFS_w_BLD, ( remaining_SB.drop(columns = ['geometry']).merge(ROOFS, on = 'SB_UUID') )],
sort = True )
print('Lengths of remaining ROOF subsets: ROOFS_w_BLD, ROOFS_w_BLD.dropna(), ROOFS, ROOFS_w_BLD.SB_UUID.unique(), ROOFS.SB_UUID.unique()')
print(len(ROOFS_w_BLD))
print(len(ROOFS_w_BLD.dropna()))
print(len(ROOFS))
print(len(ROOFS_w_BLD.SB_UUID.unique()))
print(len(ROOFS.SB_UUID.unique()))
tt = time.time()
ROOFS_w_BLD.to_file( os.path.join(OUT_FOLDER, 'SOLKAT_DACH_%s_BLD_ID' %OUTNAME) )
print('Saved %s in %.2fs' %(os.path.join(OUT_FOLDER, 'SOLKAT_DACH_%s_BLD_ID' %OUTNAME), time.time()-tt))
# # Add additional shapes to TLM_BLD
TLM_BLD = pd.concat([TLM_BLD, remaining_SB[TLM_BLD.columns]])
TLM_BLD.crs = TLM.crs
tt = time.time()
TLM_BLD.to_file( os.path.join(OUT_FOLDER, 'TLM_BLD_ID_%s' %OUTNAME) )
print('Saved %s in %.2fs' %(os.path.join(OUT_FOLDER, 'TLM_BLD_ID_%s' %OUTNAME), time.time()-tt))
"""
# # Buffer BLD
TLM_BLD_buff3 = TLM_BLD.copy()
TLM_BLD_buff3['geometry'] = TLM_BLD.geometry.buffer(3, join_style = 2)
TLM_BLD_buff3.to_file( os.path.join(OUT_FOLDER, 'TLM_%s_buffer_3m' %OUTNAME) )
TLM_BLD_buff5 = TLM_BLD.copy()
TLM_BLD_buff5['geometry'] = TLM_BLD.geometry.buffer(5, join_style = 2)
TLM_BLD_buff5.to_file( os.path.join(OUT_FOLDER, 'TLM_%s_buffer_5m' %OUTNAME) )
"""

Event Timeline