Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F111376726
Radiation_Reader.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, May 1, 04:53
Size
21 KB
Mime Type
text/x-python
Expires
Sat, May 3, 04:53 (2 d)
Engine
blob
Format
Raw Data
Handle
25895598
Attached To
R8800 solar_potential
Radiation_Reader.py
View Options
import
numpy
as
np
import
scipy
as
sc
from
tqdm
import
tqdm
import
time
import
pandas
as
pd
import
xarray
as
xr
import
os
import
sys
import
util
class
Radiation_Reader
:
# sunrise = [6, 6, 5, 4, 3, 3, 3, 4, 4, 5, 6, 6] # actual sunrise hours (in GMT)
# sunset = [16, 17, 17, 18, 19, 19, 19, 18, 18, 17, 16, 15] # actual sunset hours (in GMT)
def
__init__
(
self
,
data_path
,
start_time
=
3
,
end_time
=
19
,
variables
=
[
'SIS'
,
'SISDIR'
],
data_format
=
'H'
,
split_raw
=
True
):
'''
Initialisation settings:
- path: root of raw data, with subfolder 'global', 'direct', 'albedo'
- start_time, end_time: define mask for time selection (min 0, max 23)
- vars: list of variables of any combination of ['SIS', 'SISDIR', 'ALB', 'KI', 'SISCF']
- data_format: 'H': raw data (hourly values); 'MMH': monthly-mean-hourly; 'WWH': weekly-mean-hourly
- split_data: Only for 'H' - True for performing split into hour and date
'''
self
.
path
=
data_path
self
.
offset
=
abs
((
start_time
+
end_time
)
/
2
)
# for dropping of irrelevant data
self
.
rnge
=
abs
((
end_time
-
start_time
)
/
2
)
# for dropping of irrelevant data
self
.
variables
=
variables
self
.
format
=
data_format
self
.
split_raw
=
split_raw
if
not
split_raw
and
data_format
==
'H'
:
self
.
split_dim
=
'time'
else
:
self
.
split_dim
=
'date'
# dimension for concatenation of new data
self
.
loc_mask
=
xr
.
open_dataset
(
os
.
path
.
join
(
data_path
,
'locations_mask.nc'
))
# INITIALISATIONS
self
.
init_data
=
True
self
.
init_reading
=
True
self
.
sample
=
False
def
date_range
(
self
,
start_date
,
end_date
):
if
self
.
format
==
'H'
:
return
pd
.
date_range
(
start_date
,
end_date
,
freq
=
'D'
)
elif
self
.
format
==
'MMH'
:
return
pd
.
date_range
(
start_date
,
end_date
,
freq
=
'MS'
)
elif
self
.
format
==
'WMH'
:
return
pd
.
date_range
(
start_date
,
end_date
,
freq
=
'W-MON'
)
########################################### READ DATA #############################################
def
read_data
(
self
,
start_date
,
end_date
,
reset
=
False
,
print_log
=
True
):
if
reset
:
self
.
init_reading
=
True
# VEEEEEERY CRUDE JUST TO MAKE OLD AND NEW WORK TOGETHER - SHOULD BE RE-WRITTEN
elif
not
init_reading
:
old_data
=
self
.
data
month_start
=
pd
.
date_range
(
start_date
,
end_date
,
freq
=
'MS'
)
month_end
=
pd
.
date_range
(
start_date
,
end_date
,
freq
=
'M'
)
init
=
True
for
curr_month
,
curr_month_end
in
zip
(
month_start
,
month_end
):
monthfile
=
curr_month
.
strftime
(
'%Y-%m'
)
+
'.nc'
print
(
monthfile
)
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
self
.
path
,
monthfile
)):
print
(
"file
%s
does not exist - creating file from raw data"
%
monthfile
)
self
.
make_monthly
(
curr_month
,
curr_month_end
)
try
:
new_data
=
xr
.
open_dataset
(
os
.
path
.
join
(
self
.
path
,
monthfile
))
if
init
:
dataset
=
new_data
init
=
False
else
:
dataset
=
xr
.
concat
([
dataset
,
new_data
],
self
.
split_dim
)
except
:
print
(
"failed to read or concatenate data in
%s
"
%
monthfile
)
if
self
.
init_reading
:
self
.
data
=
dataset
self
.
init_reading
=
False
else
:
self
.
data
=
xr
.
merge
([
old_data
,
dataset
])
self
.
start
=
start_date
self
.
end
=
month_end
[
-
1
]
.
strftime
(
"%Y%m
%d
"
)
########################################## MONTHLY MEAN HOURLY ##########################################
def
read_monthly_mean_hourly
(
self
,
filename
,
data_path
,
print_log
=
False
):
month_start
=
pd
.
date_range
(
self
.
start
,
self
.
end
,
freq
=
'MS'
)
# create list of first day in month
month_end
=
pd
.
date_range
(
self
.
start
,
self
.
end
,
freq
=
'M'
)
# create list in last day of month
self
.
start
=
month_start
[
0
]
# homogenise start and end
self
.
end
=
month_end
[
-
1
]
if
print_log
:
print
(
"Reading all data from
%s
to
%s
...."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
end
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
init
=
True
for
(
curr_month
,
curr_month_end
)
in
zip
(
month_start
,
month_end
):
new_data
=
self
.
read_data
(
curr_month
,
curr_month_end
,
filename
,
data_path
,
split
=
False
)
hourly_mean
=
new_data
.
groupby
(
'time.hour'
)
.
mean
(
dim
=
'time'
)
# create monthly mean hourly for the current month
hourly_mean
.
coords
[
'date'
]
=
curr_month
# label current data - get only month number with: curr_month.month
if
init
:
# for first iteration
data_out
=
hourly_mean
init
=
False
else
:
data_out
=
xr
.
concat
([
data_out
,
hourly_mean
],
'date'
)
# add new slice
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds"
%
(
timer
))
return
data_out
########################################## WEEKLY MEAN HOURLY #######################################
def
read_weekly_mean_hourly
(
self
,
filename
,
data_path
,
print_log
=
False
):
week_start
=
pd
.
date_range
(
self
.
start
,
self
.
end
,
freq
=
'W-MON'
)
# create list of first day in month
week_end
=
pd
.
date_range
(
week_start
[
0
],
self
.
end
,
freq
=
'W-SUN'
)
# create list in last day of month
self
.
start
=
week_start
[
0
]
# homogenise start and end
self
.
end
=
week_end
[
-
1
]
if
print_log
:
print
(
"Reading all data from
%s
to
%s
...."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
end
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
init
=
True
for
(
curr_week
,
curr_week_end
)
in
zip
(
week_start
,
week_end
):
new_data
=
self
.
read_data
(
curr_week
,
curr_week_end
,
filename
,
data_path
,
split
=
False
)
hourly_mean
=
new_data
.
groupby
(
'time.hour'
)
.
mean
(
dim
=
'time'
)
# create monthly mean hourly for the current month
hourly_mean
.
coords
[
'date'
]
=
curr_week
# label current data
if
init
:
# for first iteration
data_out
=
hourly_mean
init
=
False
else
:
data_out
=
xr
.
concat
([
data_out
,
hourly_mean
],
'date'
)
# add new slice
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds"
%
(
timer
))
return
data_out
########################################### SUBSETS ###################################################
def
read_sample
(
self
,
matrix_in
=
None
,
filename
=
None
,
sample_raw
=
True
):
# read input matrix for sampling locations
if
filename
is
not
None
:
matrix_in
=
np
.
loadtxt
(
filename
)
self
.
loc_mask
[
'sample'
]
=
xr
.
DataArray
(
matrix_in
,
coords
=
self
.
loc_mask
.
coords
)
if
sample_raw
:
self
.
sample
=
True
idx
=
np
.
argwhere
(
self
.
loc_mask
.
sample
.
where
(
self
.
loc_mask
[
'CH'
]
==
1
,
drop
=
True
)
.
values
==
1
)
return
idx
def
create_regular_sample
(
self
,
downsampling_ratio
,
filename
=
None
,
sample_raw
=
True
):
n_lat
=
len
(
self
.
loc_mask
.
lat
.
values
)
n_lon
=
len
(
self
.
loc_mask
.
lon
.
values
)
smpl_int
=
int
(
np
.
floor
(
np
.
sqrt
(
downsampling_ratio
)))
# CREATE MATRIX OF THE SAME SIZE AS LOC_MASK
matrix_reg
=
np
.
zeros
((
n_lat
,
n_lon
))
matrix_reg
[::
smpl_int
,::
smpl_int
]
=
1
matrix_reg
=
matrix_reg
*
self
.
loc_mask
.
CH
.
values
.
T
self
.
loc_mask
[
'sample'
]
=
xr
.
DataArray
(
matrix_reg
,
coords
=
self
.
loc_mask
.
coords
)
if
sample_raw
:
self
.
sample
=
True
if
filename
is
not
None
:
np
.
savetxt
(
filename
,
matrix_reg
)
idx
=
np
.
argwhere
(
self
.
loc_mask
.
sample
.
where
(
self
.
loc_mask
[
'CH'
]
==
1
,
drop
=
True
)
.
values
==
1
)
return
idx
def
create_random_sample
(
self
,
n_sample
,
filename
=
None
,
sample_raw
=
True
):
n_lat
=
len
(
self
.
loc_mask
.
lat
.
values
)
n_lon
=
len
(
self
.
loc_mask
.
lon
.
values
)
# CREATE MATRIX OF THE SAME SIZE AS LOC_MASK
loc_mask_local
=
self
.
loc_mask
.
where
(
self
.
loc_mask
[
'CH'
]
==
1
)
indices_CH
=
loc_mask_local
.
to_dataframe
()
.
reset_index
()
.
dropna
()
.
index
# obtain list of indices in original dataset that are in CH
indices_random
=
np
.
random
.
permutation
(
indices_CH
)[:
n_sample
]
# obtain list of random indices
matrix_rand
=
np
.
zeros
((
n_lon
*
n_lat
,
1
))
matrix_rand
[
indices_random
]
=
1
# get matrix with random values set to 1
matrix_rand
=
matrix_rand
.
reshape
((
n_lat
,
n_lon
))
self
.
loc_mask
[
'sample'
]
=
xr
.
DataArray
(
matrix_rand
,
coords
=
self
.
loc_mask
.
coords
)
if
sample_raw
:
self
.
sample
=
True
if
filename
is
not
None
:
np
.
savetxt
(
filename
,
matrix_rand
)
idx
=
np
.
argwhere
(
self
.
loc_mask
.
sample
.
where
(
self
.
loc_mask
[
'CH'
]
==
1
,
drop
=
True
)
.
values
==
1
)
return
idx
,
matrix_rand
def
get_subset
(
self
,
lon_idx
,
lat_idx
,
print_log
=
True
):
loc_mask_local
=
self
.
loc_mask
.
where
(
self
.
loc_mask
[
'CH'
]
==
1
,
drop
=
True
)
timer
=
time
.
clock
()
init
=
True
for
lon
,
lat
in
tqdm
(
zip
(
lon_idx
,
lat_idx
)):
loc_mask_local
.
CH
.
values
[
lon
,
lat
]
=
2
new_slice
,
_
,
_
,
_
=
self
.
get_locslice
(
lat_idx
=
lat
,
lon_idx
=
lon
,
random
=
False
)
new_tbl
=
self
.
make_table
(
new_slice
,
print_log
=
False
)
if
init
:
subset
=
new_tbl
init
=
False
else
:
subset
=
pd
.
concat
([
subset
,
new_tbl
])
if
print_log
:
print
(
"Created subset in
%.2f
seconds"
%
(
time
.
clock
()
-
timer
))
return
subset
,
loc_mask_local
########################################### CREATE TABLES ############################################
def
make_table
(
self
,
indata
=
None
,
print_log
=
True
):
if
indata
is
None
:
indata
=
self
.
data
timer
=
time
.
clock
()
coords
=
self
.
loc_mask
.
to_dataframe
()
.
dropna
()
.
reset_index
()
table
=
indata
.
to_dataframe
()
.
dropna
()
.
reset_index
()
table
[
'month'
]
=
pd
.
to_datetime
(
table
[
'date'
]
.
values
)
.
month
table
[
'day'
]
=
pd
.
to_datetime
(
table
[
'date'
]
.
values
)
.
dayofyear
table
[
'timestamp'
]
=
util
.
to_timestamp
(
table
[
'date'
]
.
values
,
table
[
'hour'
]
.
values
)
table
=
pd
.
merge
(
left
=
table
,
right
=
coords
,
how
=
"left"
,
left_on
=
[
"lon"
,
"lat"
],
right_on
=
[
"lon"
,
"lat"
])
if
print_log
:
print
(
"Created table in
%.2f
seconds"
%
(
time
.
clock
()
-
timer
))
return
table
########################################## DATA SLICES ################################################
def
get_timeslice
(
self
,
hour
=
-
1
,
date
=
-
1
,
hour_idx
=
-
1
,
date_idx
=
-
1
):
"""
DESCRIPTION
Inputs:
hour / date: input to select data
hour_idx/ date_idx: input to select data by index
Outputs:
"""
try
:
if
self
.
split_dim
==
'date'
:
# IF DATA IS SPLIT
if
hour
==
-
1
and
hour_idx
==
-
1
:
# select random time
print
(
"Random selection of hour"
)
hour_idx
=
np
.
random
.
randint
(
self
.
data
.
sizes
[
'hour'
])
if
date
==
-
1
and
date_idx
==
-
1
:
# select random date
print
(
"Random selection of date"
)
date_idx
=
np
.
random
.
randint
(
self
.
data
.
sizes
[
'date'
])
if
hour
==
-
1
:
hour
=
self
.
data
.
hour
.
values
[
hour_idx
]
if
date
==
-
1
:
# if time was chosen by index
date
=
self
.
data
.
date
.
values
[
date_idx
]
timeslice
=
self
.
data
.
sel
(
date
=
pd
.
to_datetime
(
date
),
hour
=
hour
)
else
:
# IF DATA IS NOT SPLIT YET
if
hour
==
-
1
or
date
==
-
1
:
print
(
"Error: Please specify date and time"
)
return
timeslice
=
self
.
split_dataset
(
dates
=
date
)
.
sel
(
hour
=
hour
)
except
:
print
(
"Unexpected error:"
,
sys
.
exc_info
())
print
(
"Verify that date (
%s
) and hour (
%d
) are in the data set!"
%
(
date
,
hour
))
return
return
timeslice
,
date
,
hour
def
get_locslice
(
self
,
lat_idx
=
-
1
,
lon_idx
=
-
1
,
start_date
=
-
1
,
end_date
=
-
1
,
random
=
True
):
"""
DESCRIPTION
Inputs:
Outputs:
"""
if
random
==
True
:
# select random time
CH
=
False
while
CH
==
False
:
lat_idx
=
np
.
random
.
randint
(
self
.
data
.
sizes
[
'lat'
])
lon_idx
=
np
.
random
.
randint
(
self
.
data
.
sizes
[
'lon'
])
if
self
.
loc_mask
.
isel
(
lon
=
lon_idx
,
lat
=
lat_idx
)
.
CH
.
values
==
1
:
CH
=
True
print
(
"find random location in Switzerland ..."
)
loc_mask_local
=
self
.
loc_mask
.
where
(
self
.
loc_mask
[
'CH'
]
==
1
,
drop
=
True
)
loc_mask_local
.
CH
.
values
[
lon_idx
,
lat_idx
]
=
2
if
start_date
==
-
1
:
start_date
=
self
.
start
if
end_date
==
-
1
:
end_date
=
self
.
end
dates
=
self
.
date_range
(
start_date
,
end_date
)
.
strftime
(
"%Y%m
%d
"
)
if
self
.
split_dim
==
'time'
:
locslice
=
self
.
split_dataset
(
dates
=
dates
)
else
:
locslice
=
self
.
data
.
sel
(
date
=
pd
.
to_datetime
(
dates
))
try
:
locslice
=
locslice
.
isel
(
lon
=
lon_idx
,
lat
=
lat_idx
)
lon
=
self
.
data
.
lon
.
item
(
lon_idx
)
lat
=
self
.
data
.
lat
.
item
(
lat_idx
)
except
:
print
(
"Unexpected error:"
,
sys
.
exc_info
()[
0
])
print
(
"Verify that longitude (idx
%d
) and latitude (idx
%d
) are in the data set!"
%
(
lon_idx
,
lat_idx
))
return
return
locslice
,
lon
,
lat
,
loc_mask_local
##########################################################################################################
########################################## RAW DATA READING ############################################
##########################################################################################################
########################################## KEY FUNCTIONS #############################################
def
make_monthly
(
self
,
start_date
,
end_date
):
month_start
=
pd
.
date_range
(
start_date
,
end_date
,
freq
=
'MS'
)
month_end
=
pd
.
date_range
(
start_date
,
end_date
,
freq
=
'M'
)
for
curr_month
,
curr_month_end
in
zip
(
month_start
,
month_end
):
try
:
self
.
add_data
(
curr_month
,
curr_month_end
,
reset
=
True
)
file
=
curr_month
.
strftime
(
"%Y-%m"
)
+
'.nc'
self
.
data
.
to_netcdf
(
os
.
path
.
join
(
self
.
path
,
file
))
except
:
print
(
"Could not process month
%s
"
%
curr_month
.
strftime
(
"%Y-%m"
))
self
.
start
=
start_date
self
.
end
=
month_end
[
-
1
]
.
strftime
(
"%Y%m
%d
"
)
def
add_data
(
self
,
start_date
,
end_date
,
reset
=
False
,
print_log
=
True
):
self
.
start
=
start_date
self
.
end
=
end_date
if
reset
:
self
.
init_data
=
True
init_merge
=
True
for
ds
in
self
.
variables
:
if
ds
==
'SIS'
:
if
print_log
:
print
(
'Adding global ... '
)
new_data
=
self
.
read_global
(
print_log
)
elif
ds
==
'SISDIR'
:
if
print_log
:
print
(
'Adding direct ... '
)
new_data
=
self
.
read_direct
(
print_log
)
elif
ds
==
'ALB'
:
if
print_log
:
print
(
'Adding albedo ... '
)
new_data
=
self
.
read_albedo
(
print_log
)
elif
ds
==
'KI'
:
if
print_log
:
print
(
'Adding clearness index ... '
)
new_data
=
self
.
read_clearness
(
print_log
)
elif
ds
==
'SISCF'
:
if
print_log
:
print
(
'Adding global clear sky ... '
)
new_data
=
self
.
read_clearsky_global
(
print_log
)
elif
ds
==
'SISDIRCF'
:
if
print_log
:
print
(
'Adding direct clear sky ... '
)
new_data
=
self
.
read_clearsky_direct
(
print_log
)
else
:
print
(
"ERROR: VARIABLE NOT FOUND"
)
return
if
init_merge
:
new_dataset
=
new_data
init_merge
=
False
else
:
print
(
'Merging data ... '
)
new_dataset
=
xr
.
merge
([
new_dataset
,
new_data
])
if
self
.
init_data
:
self
.
data
=
new_dataset
self
.
init_data
=
False
else
:
print
(
'Adding dataset ... '
)
self
.
data
=
xr
.
concat
([
self
.
data
,
new_dataset
],
self
.
split_dim
)
if
print_log
:
print
(
"Reading completed"
)
######################################### DATA SELECTION ###############################################
def
choose_format
(
self
,
filename
,
data_path
,
print_log
=
True
):
if
self
.
format
==
'H'
:
if
print_log
:
print
(
'Adding raw data ... '
)
data_out
=
self
.
read_data
(
self
.
start
,
self
.
end
,
filename
,
data_path
,
split
=
self
.
split_raw
)
elif
self
.
format
==
'MMH'
:
if
print_log
:
print
(
'Adding monthly mean hourly data ... '
)
data_out
=
self
.
read_monthly_mean_hourly
(
filename
,
data_path
)
elif
self
.
format
==
'WMH'
:
if
print_log
:
print
(
'Adding weekly mean hourly data ... '
)
data_out
=
self
.
read_weekly_mean_hourly
(
filename
,
data_path
)
else
:
print
(
"ERROR: DATA FORMAT NOT FOUND"
)
return
return
data_out
################################# INITIALIZE DATA TYPE READERS #########################################
def
read_global
(
self
,
print_log
=
True
):
data_path
=
os
.
path
.
join
(
self
.
path
,
'global'
)
filename
=
lambda
date
:
'msg.SIS.H_ch02.lonlat_'
+
date
+
'000000.nc'
# create data set:
if
print_log
:
print
(
"Reading global radiation data from
%s
to
%s
..."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
end
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
global_data
=
self
.
choose_format
(
filename
,
data_path
,
print_log
)
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds
\n
"
%
(
timer
))
return
global_data
def
read_direct
(
self
,
print_log
=
True
):
data_path
=
os
.
path
.
join
(
self
.
path
,
'direct'
)
filename
=
lambda
date
:
'msg.SISDIR.H_ch02.lonlat_'
+
date
+
'000000.nc'
# create data set:
if
print_log
:
print
(
"Reading direct radiation data from
%s
to
%s
...."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
direct_data
=
self
.
choose_format
(
filename
,
data_path
,
print_log
)
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds
\n
"
%
(
timer
))
return
direct_data
def
read_albedo
(
self
,
print_log
=
True
):
data_path
=
os
.
path
.
join
(
self
.
path
,
'albedo'
)
filename
=
lambda
date
:
'msg.ALB.H_ch02.lonlat_'
+
date
+
'000000.nc'
# create data set:
if
print_log
:
print
(
"Reading surface albedo data from
%s
to
%s
...."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
end
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
albedo_data
=
self
.
choose_format
(
filename
,
data_path
,
print_log
)
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds
\n
"
%
(
timer
))
return
albedo_data
def
read_clearness
(
self
,
print_log
=
True
):
data_path
=
os
.
path
.
join
(
self
.
path
,
'clearness'
)
filename
=
lambda
date
:
'msg.KI.H_ch02.lonlat_'
+
date
+
'000000.nc'
# create data set:
if
print_log
:
print
(
"Reading clearness index data from
%s
to
%s
...."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
end
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
clearness_data
=
self
.
choose_format
(
filename
,
data_path
,
print_log
)
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds
\n
"
%
(
timer
))
return
clearness_data
def
read_clearsky_global
(
self
,
print_log
=
True
):
data_path
=
os
.
path
.
join
(
self
.
path
,
'clearsky-global'
)
filename
=
lambda
date
:
'msg.SISCF.H_ch02.lonlat_'
+
date
+
'000000.nc'
# create data set:
if
print_log
:
print
(
"Reading global clear sky radiation data from
%s
to
%s
...."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
end
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
clearness_data
=
self
.
choose_format
(
filename
,
data_path
,
print_log
)
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds
\n
"
%
(
timer
))
return
clearness_data
def
read_clearsky_direct
(
self
,
print_log
=
True
):
data_path
=
os
.
path
.
join
(
self
.
path
,
'clearsky-direct'
)
filename
=
lambda
date
:
'msg.SISDIRCF.H_ch02.lonlat_'
+
date
+
'000000.nc'
# create data set:
if
print_log
:
print
(
"Reading direct clear sky radiation data from
%s
to
%s
...."
%
(
pd
.
to_datetime
(
self
.
start
)
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
self
.
end
)
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
clearness_data
=
self
.
choose_format
(
filename
,
data_path
,
print_log
)
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished reading in
%.2f
seconds
\n
"
%
(
timer
))
return
clearness_data
##################################### READING RAW DATA ###############################################
def
read_data
(
self
,
start_date
,
end_date
,
filename
,
data_path
,
split
):
if
split
:
split_dim
=
'date'
else
:
split_dim
=
'time'
dates
=
pd
.
date_range
(
start_date
,
end_date
)
init
=
True
for
date
in
tqdm
(
dates
):
file
=
filename
(
date
.
strftime
(
"%Y%m
%d
"
))
try
:
new_data
=
xr
.
open_dataset
(
os
.
path
.
join
(
data_path
,
file
))
if
init
:
# for first iteration
self
.
loc_mask
=
self
.
loc_mask
.
reindex_like
(
new_data
,
'nearest'
)
new_data
=
new_data
.
where
(
abs
(
new_data
[
'time.hour'
]
-
self
.
offset
)
<=
self
.
rnge
,
drop
=
True
)
# only keep relevant hours
new_data
=
new_data
.
where
(
self
.
loc_mask
[
'CH'
]
==
1
,
drop
=
True
)
.
drop
(
'geographic_projection'
)
# only keep relevant locations & data
if
self
.
sample
:
new_data
=
new_data
.
where
(
self
.
loc_mask
[
'sample'
]
==
1
)
# remove all samples that are not of interest, but do not reduce overall size
if
split
:
new_data
=
new_data
.
groupby
(
'time.hour'
)
.
mean
(
dim
=
'time'
)
# create monthly mean hourly for the current month
new_data
.
coords
[
'date'
]
=
date
if
init
:
# for first iteration
data_out
=
new_data
# drop geographic projection (cut out unnecessary data)
init
=
False
else
:
data_out
=
xr
.
concat
([
data_out
,
new_data
],
split_dim
)
except
:
print
(
"Unexpected error:"
,
sys
.
exc_info
()[
0
])
print
(
"Check file in
%s
"
%
(
os
.
path
.
join
(
data_path
,
file
)))
return
data_out
####################################### SPLITTING DATASET #######################################
def
split_dataset
(
self
,
dates
=
[],
print_log
=
True
):
try
:
if
len
(
dates
)
==
0
:
# if nothing was passed to the funcion
dates
=
self
.
date_range
(
self
.
start
,
self
.
end
)
date_range
=
pd
.
to_datetime
(
dates
)
.
strftime
(
"%Y%m
%d
"
)
if
print_log
:
print
(
"Splitting all data from
%s
to
%s
...
\n
This is faster if split_raw is selected in initialisation of reader!"
%
(
pd
.
to_datetime
(
dates
[
0
])
.
strftime
(
"%Y-%m-
%d
"
),
pd
.
to_datetime
(
dates
[
-
1
])
.
strftime
(
"%Y-%m-
%d
"
)))
timer
=
time
.
clock
()
init
=
True
for
day
in
date_range
:
day_data
=
self
.
data
.
sel
(
time
=
slice
(
day
))
data
=
day_data
.
groupby
(
'time.hour'
)
.
mean
(
dim
=
'time'
)
data
.
coords
[
'date'
]
=
pd
.
to_datetime
(
day
)
if
init
:
splitted_data
=
data
init
=
False
else
:
splitted_data
=
xr
.
concat
([
splitted_data
,
data
],
'date'
)
# add new slice
timer
=
time
.
clock
()
-
timer
if
print_log
:
print
(
"Finished splitting data in
%.2f
seconds"
%
(
timer
))
except
:
print
(
"Error: Verify that date has right format; set ['datestring']"
)
return
return
splitted_data
Event Timeline
Log In to Comment