Page MenuHomec4science

r_to_pd_v2.py
No OneTemporary

File Metadata

Created
Wed, May 14, 19:13

r_to_pd_v2.py

import xarray as xr
import pandas as pd
import numpy as np
from rpy2.robjects import r
import os
import sys
import time
######## USER INPUT ##############
SOURCE_DIR = sys.argv[1]
TARGET_DIR = sys.argv[2]
print( 'Source directory: %s' %SOURCE_DIR ) # path to a folder that contains RData
print( 'Target directory: %s' %TARGET_DIR ) # target directory for output csv data
### == Constants == ###
SKIP = 50
modelchain = os.path.split(SOURCE_DIR)[1]
print('Model chain: %s' %modelchain)
##################################
def load_r_to_array( filename ):
# Reads file from RFata and converts it to xarray DataArray and pandas DataFrame
# Read RData file and convert data and meta-data into useful formats
data = r.load( filename , verbose = True)
df = r(data[0])
attributes = r('attributes(%s)' %(data[0]))
# Convert data into dictionary for further use
df_dict = dict(zip(df.names, list(df)))
attr_dict = dict(zip(attributes.names, list(attributes)))
# convert integers to datetime indices
dates = [ pd.to_datetime(0) + pd.Timedelta(val, unit='d') for val in df_dict['data.time'] ]
# convert data to a vertical numpy array
df_vect = np.asarray(df_dict['data.series']).reshape(-1,1)
return df_vect, dates, attr_dict['station'][0]
def save_files( dict ):
# save intermediate data
for key, df in dict.items():
try:
df.to_csv( os.path.join(TARGET_DIR, '%s_%s.csv' %(key, modelchain) ))
print("Wrote file %s" %os.path.join(TARGET_DIR, '%s_%s.csv' %(key, modelchain) ))
except Exception as e: print('Failed to convert and save %s: %s' %(key, e))
###################################
# create a dictionary with entries for all variables to correctly assign files to variables
var_dict = {
'pr' : None ,
'tas' : None ,
'rsds': None
}
i = 0
for file in os.listdir( SOURCE_DIR ):
fp = os.path.join( SOURCE_DIR, file )
timer = time.time()
i += 1
try:
# verify that the file ha the right extension:
file_base = file.split('.')
if file_base[1] != 'Rdata':
print('Skipped file %s' %file)
continue
# get the variable name of the current file
components = file_base[0].split('_')
var = components[0]
# load the file into pandas dataframe and append to variable
data, dates, loc = load_r_to_array( fp )
if var_dict[ var ] is None:
var_dict[ var ] = pd.DataFrame( data = [], index = dates )
var_dict[ var ][loc] = data
if i % SKIP == 0:
save_files( var_dict )
print("Added %s" %fp)
except Exception as e: print('Failed to add %s: %s' %(fp, e))
print('Loop iteration time (iteration %d): %f' %(i, time.time()-timer))
save_files( var_dict )

Event Timeline