File Metadata

Created: Fri, Jul 4, 14:42

cluster_pv.py
View Options

	import numpy as np
	import pandas as pd

	import os
	import sys
	import time

	from sklearn.cluster import DBSCAN

	#### Variable definitions

	## INPUTS
	INFILE = sys.argv[1]
	OUTFILE = sys.argv[2]

	# Optional inputs
	if len(sys.argv) > 3:
	MIN_SIZE = float(sys.argv[3]) # Minimum size of installations (in m2)
	else:
	MIN_SIZE = 2.5

	# DBSCAN Hyperparameters: see https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html
	if len(sys.argv) > 5:
	EPS = float(sys.argv[4])
	MIN_SAMPLE = int(sys.argv[5])
	else:
	EPS = 0.72 # Accept 2 missing pixels in x, y, or diagonal direction
	MIN_SAMPLE = 10 # Minimum 10 samples for "core points"

	# Constants:
	COORDS = ['x', 'y']

	print('\nLoad pixel data from %s' %INFILE)

	print('Minimum installation size: %.1f m2' %MIN_SIZE)
	print('DBSCAN parameters: EPS = %.2f, MIN_SAMPLE = %d' %(EPS, MIN_SAMPLE))

	print('Save output to %s\n' %OUTFILE)

	#### Load data

	all_pixels = pd.read_csv( INFILE )

	#### Perform clustering

	tt = time.time()
	all_pixels['cluster_ID'] = DBSCAN(eps=EPS, min_samples=MIN_SAMPLE, n_jobs=-1).fit_predict(all_pixels[ COORDS ])
	print('Clustered data in %.2fs' %(time.time()-tt))

	# Exclude all those without an assigned cluster
	PV_clusters = all_pixels[all_pixels.cluster_ID >= 0]

	# Get those clusters with a valid size
	PV_cluster_area = PV_clusters.groupby('cluster_ID').sum().pixel_area
	valid_clusters = PV_cluster_area[ PV_cluster_area >= MIN_SIZE ]

	clustered_pixels = all_pixels[all_pixels.cluster_ID.isin( valid_clusters.index.values )]

	print( clustered_pixels.head() )

	# Save output
	clustered_pixels.to_csv( OUTFILE, index = False )
	print('Saved %s - DONE' %OUTFILE)

cluster_pv.py
No OneTemporary
Actions

File Metadata

cluster_pv.py
View Options

Event Timeline

cluster_pv.pyNo OneTemporaryActions

File Metadata

cluster_pv.pyView Options

Event Timeline

cluster_pv.py
No OneTemporary
Actions

cluster_pv.py
View Options