File Metadata

Created: Fri, Jul 4, 14:26

r_to_pd_v2.py
View Options

	import xarray as xr
	import pandas as pd
	import numpy as np
	from rpy2.robjects import r

	import os
	import sys
	import time

	######## USER INPUT ##############

	SOURCE_DIR = sys.argv[1]
	TARGET_DIR = sys.argv[2]

	print( 'Source directory: %s' %SOURCE_DIR ) # path to a folder that contains RData
	print( 'Target directory: %s' %TARGET_DIR ) # target directory for output csv data

	### == Constants == ###

	SKIP = 50

	modelchain = os.path.split(SOURCE_DIR)[1]
	print('Model chain: %s' %modelchain)

	##################################

	def load_r_to_array( filename ):
	# Reads file from RFata and converts it to xarray DataArray and pandas DataFrame

	# Read RData file and convert data and meta-data into useful formats
	data = r.load( filename , verbose = True)
	df = r(data[0])
	attributes = r('attributes(%s)' %(data[0]))

	# Convert data into dictionary for further use
	df_dict = dict(zip(df.names, list(df)))
	attr_dict = dict(zip(attributes.names, list(attributes)))

	# convert integers to datetime indices
	dates = [ pd.to_datetime(0) + pd.Timedelta(val, unit='d') for val in df_dict['data.time'] ]

	# convert data to a vertical numpy array
	df_vect = np.asarray(df_dict['data.series']).reshape(-1,1)

	return df_vect, dates, attr_dict['station'][0]

	def save_files( dict ):
	# save intermediate data
	for key, df in dict.items():
	try:
	df.to_csv( os.path.join(TARGET_DIR, '%s_%s.csv' %(key, modelchain) ))
	print("Wrote file %s" %os.path.join(TARGET_DIR, '%s_%s.csv' %(key, modelchain) ))

	except Exception as e: print('Failed to convert and save %s: %s' %(key, e))

	###################################

	# create a dictionary with entries for all variables to correctly assign files to variables
	var_dict = {
	'pr' : None ,
	'tas' : None ,
	'rsds': None
	}

	i = 0
	for file in os.listdir( SOURCE_DIR ):
	fp = os.path.join( SOURCE_DIR, file )

	timer = time.time()
	i += 1

	try:
	# verify that the file ha the right extension:
	file_base = file.split('.')
	if file_base[1] != 'Rdata':
	print('Skipped file %s' %file)
	continue

	# get the variable name of the current file
	components = file_base[0].split('_')
	var = components[0]

	# load the file into pandas dataframe and append to variable
	data, dates, loc = load_r_to_array( fp )

	if var_dict[ var ] is None:
	var_dict[ var ] = pd.DataFrame( data = [], index = dates )

	var_dict[ var ][loc] = data

	if i % SKIP == 0:
	save_files( var_dict )

	print("Added %s" %fp)

	except Exception as e: print('Failed to add %s: %s' %(fp, e))

	print('Loop iteration time (iteration %d): %f' %(i, time.time()-timer))

	save_files( var_dict )

r_to_pd_v2.py
No OneTemporary
Actions

File Metadata

r_to_pd_v2.py
View Options

Event Timeline

r_to_pd_v2.pyNo OneTemporaryActions

File Metadata

r_to_pd_v2.pyView Options

Event Timeline

r_to_pd_v2.py
No OneTemporary
Actions

r_to_pd_v2.py
View Options