Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F113079712
r_to_pd_v2.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, May 14, 19:13
Size
2 KB
Mime Type
text/x-python
Expires
Fri, May 16, 19:13 (2 d)
Engine
blob
Format
Raw Data
Handle
26175752
Attached To
R8800 solar_potential
r_to_pd_v2.py
View Options
import
xarray
as
xr
import
pandas
as
pd
import
numpy
as
np
from
rpy2.robjects
import
r
import
os
import
sys
import
time
######## USER INPUT ##############
SOURCE_DIR
=
sys
.
argv
[
1
]
TARGET_DIR
=
sys
.
argv
[
2
]
print
(
'Source directory:
%s
'
%
SOURCE_DIR
)
# path to a folder that contains RData
print
(
'Target directory:
%s
'
%
TARGET_DIR
)
# target directory for output csv data
### == Constants == ###
SKIP
=
50
modelchain
=
os
.
path
.
split
(
SOURCE_DIR
)[
1
]
print
(
'Model chain:
%s
'
%
modelchain
)
##################################
def
load_r_to_array
(
filename
):
# Reads file from RFata and converts it to xarray DataArray and pandas DataFrame
# Read RData file and convert data and meta-data into useful formats
data
=
r
.
load
(
filename
,
verbose
=
True
)
df
=
r
(
data
[
0
])
attributes
=
r
(
'attributes(
%s
)'
%
(
data
[
0
]))
# Convert data into dictionary for further use
df_dict
=
dict
(
zip
(
df
.
names
,
list
(
df
)))
attr_dict
=
dict
(
zip
(
attributes
.
names
,
list
(
attributes
)))
# convert integers to datetime indices
dates
=
[
pd
.
to_datetime
(
0
)
+
pd
.
Timedelta
(
val
,
unit
=
'd'
)
for
val
in
df_dict
[
'data.time'
]
]
# convert data to a vertical numpy array
df_vect
=
np
.
asarray
(
df_dict
[
'data.series'
])
.
reshape
(
-
1
,
1
)
return
df_vect
,
dates
,
attr_dict
[
'station'
][
0
]
def
save_files
(
dict
):
# save intermediate data
for
key
,
df
in
dict
.
items
():
try
:
df
.
to_csv
(
os
.
path
.
join
(
TARGET_DIR
,
'
%s
_
%s
.csv'
%
(
key
,
modelchain
)
))
print
(
"Wrote file
%s
"
%
os
.
path
.
join
(
TARGET_DIR
,
'
%s
_
%s
.csv'
%
(
key
,
modelchain
)
))
except
Exception
as
e
:
print
(
'Failed to convert and save
%s
:
%s
'
%
(
key
,
e
))
###################################
# create a dictionary with entries for all variables to correctly assign files to variables
var_dict
=
{
'pr'
:
None
,
'tas'
:
None
,
'rsds'
:
None
}
i
=
0
for
file
in
os
.
listdir
(
SOURCE_DIR
):
fp
=
os
.
path
.
join
(
SOURCE_DIR
,
file
)
timer
=
time
.
time
()
i
+=
1
try
:
# verify that the file ha the right extension:
file_base
=
file
.
split
(
'.'
)
if
file_base
[
1
]
!=
'Rdata'
:
print
(
'Skipped file
%s
'
%
file
)
continue
# get the variable name of the current file
components
=
file_base
[
0
]
.
split
(
'_'
)
var
=
components
[
0
]
# load the file into pandas dataframe and append to variable
data
,
dates
,
loc
=
load_r_to_array
(
fp
)
if
var_dict
[
var
]
is
None
:
var_dict
[
var
]
=
pd
.
DataFrame
(
data
=
[],
index
=
dates
)
var_dict
[
var
][
loc
]
=
data
if
i
%
SKIP
==
0
:
save_files
(
var_dict
)
print
(
"Added
%s
"
%
fp
)
except
Exception
as
e
:
print
(
'Failed to add
%s
:
%s
'
%
(
fp
,
e
))
print
(
'Loop iteration time (iteration
%d
):
%f
'
%
(
i
,
time
.
time
()
-
timer
))
save_files
(
var_dict
)
Event Timeline
Log In to Comment