Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102822505
joinmds
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Feb 24, 13:31
Size
12 KB
Mime Type
text/x-perl
Expires
Wed, Feb 26, 13:31 (2 d)
Engine
blob
Format
Raw Data
Handle
24433774
Attached To
R2795 mitgcm_lac_leman_abirani
joinmds
View Options
#!/usr/local/bin/perl -w
# MITgcmUV DataSet joining utility.
# Tested with perl 4.0 and newer.
# Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25}
# Zhangfan XING, xing@pacific.jpl.nasa.gov
# Adapted to work with MDS I/O format by adcroft@mit.edu, 5/7/1999
#
# LOGS:
# 980707, version 0.0.1, basically works
# 980721, version 0.2.0, proper handling of data file's header and terminator
# for diff bytesex.
# 990507, HACK'd by AJA. Needs to be properly merged with the original joinds
#------
# usage
#------
sub
usage
{
print
STDERR
"\nUsage:$0 [-Ddir0 -Ddir1 ...] "
.
"prefix suffix [(little-endian|big-endian)]\n"
;
print
STDERR
"\nMITgcmUV DataSet joining utility, version 0.3.0\n"
;
print
STDERR
"Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n"
;
print
STDERR
"Report problem to xing\@pacific.jpl.nasa.gov\n\n"
;
exit
1
;
}
#------------------------------
# product of a list of integers
#------------------------------
sub
listprod
{
local
(
$product
)
=
1
;
local
(
$x
);
foreach
$x
(
@_
)
{
$product
*=
$x
;
}
$product
;
}
#----------------
# @list1 + @list2
#----------------
sub
lists_add
{
local
(
*
l1
,
*
l2
)
=
@_
;
(
$#l1
==
$#l2
)
||
return
undef
;
local
(
@l
);
for
(
local
(
$i
)
=
0
;
$i
<=
$#l1
;
$i
++
)
{
$l
[
$i
]
=
$l1
[
$i
]
+
$l2
[
$i
];
}
@l
;
}
#-------------
# pos to index
# 0-based.
#-------------
sub
pos2index
{
local
(
$pos
,
@dim
)
=
@_
;
local
(
$rightmost
)
=
pop
(
@dim
);
local
(
@index
,
$d
);
foreach
$d
(
@dim
)
{
push
(
@index
,
$pos%$d
);
$pos
=
int
(
$pos
/
$d
);
}
# self-guarding
unless
(
$rightmost
>
$pos
)
{
return
undef
;
}
push
(
@index
,
$pos
);
@index
;
}
#-------------
# index to pos
# 0-based.
#-------------
sub
index2pos
{
local
(
*
index
,
*
dim
)
=
@_
;
return
undef
unless
(
$#index
==
$#dim
);
local
(
$pos
)
=
$index
[
$#index
];
for
(
local
(
$i
)
=
$#dim
;
$i
>
0
;
$i
--
)
{
$pos
=
$pos
*
$dim
[
$i
-
1
]
+
$index
[
$i
-
1
];
}
$pos
;
}
#-------------------------
# check machine's bytesex.
# returns "little-endian" or "big-endian"
# or dies if unable to figure out
#-------------------------
sub
mach_bytesex
{
local
(
$foo
)
=
pack
(
"s2"
,
1
,
2
);
if
(
$foo
eq
"\1\0\2\0"
)
{
return
"little-endian"
;
}
elsif
(
$foo
eq
"\0\1\0\2"
)
{
return
"big-endian"
;
}
else
{
die
"Your machine has a strange bytesex.\n"
.
"Email your platform info to xing\@pacific.jpl.nasa.gov\n"
;
}
}
#--------------------------------------------------
# check bytesex of a fortran unformatted data file
# current machine's bytesex is used as a reference.
# returns: one of "little-endian", "big-endian", "undecidable" and "unknown"
#--------------------------------------------------
sub
file_bytesex
{
# only if this platform's bytesex is either big- or little-endian
# otherwise dies. Hope this won't happen.
local
(
$mach_bytesex
)
=
&
mach_bytesex
();
local
(
$file
)
=
shift
;
local
(
*
FILE
);
open
(
FILE
,
$file
)
||
die
"$file: $!\n"
;
local
(
@fstat
)
=
stat
(
FILE
);
local
(
$size
)
=
$fstat
[
7
]
-
8
;
# total data size in bytes
local
(
$hdr
,
$tmr
)
=
(
""
,
""
);
read
(
FILE
,
$hdr
,
4
);
seek
(
FILE
,
-
4
,
2
);
read
(
FILE
,
$tmr
,
4
);
close
(
FILE
);
# this part checks for self-consistency of Fortran unformatted file
(
$hdr
eq
$tmr
)
||
die
"$file: not a Fortran unformatted data file.\n"
;
local
(
$ori
)
=
unpack
(
"I"
,
$hdr
);
local
(
$rev
)
=
unpack
(
"I"
,
join
(
""
,
reverse
(
split
(
//
,
$hdr
))));
(
$ori
!=
$size
&&
$rev
!=
$size
)
&&
return
"unknown"
;
(
$ori
==
$size
&&
$rev
==
$size
)
&&
return
"undecidable"
;
local
(
$opposite
)
=
(
$mach_bytesex
eq
"little-endian"
)
?
"big-endian"
:
"little-endian"
;
return
(
$ori
==
$size
)
?
$mach_bytesex
:
$opposite
;
}
#--------------------------------
# check meta info for one dataset
#--------------------------------
sub
check_meta
{
local
(
$ds
,
$dir
)
=
@_
;
local
(
$fmeta
)
=
"$dir/$ds.meta"
;
#~~~~~~~~~~~~~~~~
# check meta info
#~~~~~~~~~~~~~~~~
undef
$/
;
# read to the end of file
open
(
MFILE
,
"<$fmeta"
)
||
die
"$fmeta: $!\n"
;
$_
=
<MFILE>
;
close
(
MFILE
);
$/
=
"\n"
;
# never mess up
s/\([^)]*\)//g
;
#rm (.*)
s/\/\/[^\n]*\n//g
;
#rm comment lines
s/\/\*.*\*\///g
;
#rm inline comments
s/\s+//g
;
#rm white spaces
/nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];nrecords=\[(.+)\];timeStepNumber=\[(.+)\];/
||
die
"$fmeta: meta file format error\n"
;
local
(
$nDims_
,
$dimList_
,
$format_
,
$nrecords_
,
$timeStepNumber_
)
=
(
$1
,
$2
,
$3
,
$4
,
$5
);
# check Identifier
(
defined
$timeStepNumber
)
||
(
$timeStepNumber
=
$timeStepNumber_
);
(
$timeStepNumber
eq
$timeStepNumber_
)
||
die
"$fmeta: timeStepNumber $timeStepNumber_ inconsistent with other dataset\n"
;
# check Number of dimensions
(
defined
$nDims
)
||
(
$nDims
=
$nDims_
);
(
$nDims
eq
$nDims_
)
||
die
"$fmeta: nDims $nDims_ inconsistent with other dataset\n"
;
# check Field format
(
defined
$format
)
||
(
$format
=
$format_
);
(
$format
eq
$format_
)
||
die
"$fmeta: format $format_ inconsistent with other dataset\n"
;
# check dimList
# calc dimesions and leading index of this subset
local
(
@dimList_
)
=
split
(
/,/
,
$dimList_
);
(
$nDims_
*
3
==
$#dimList_
+
1
)
||
die
"$fmeta: nDims and dimList conflicting\n"
;
local
(
@Dim
,
@dim
,
@Index0
)
=
();
for
(
local
(
$i
)
=
0
;
$i
<
$nDims_
;
$i
++
)
{
push
(
@Dim
,
$dimList_
[
$i
*
3
]);
push
(
@dim
,
$dimList_
[
$i
*
3
+
2
]
-
$dimList_
[
$i
*
3
+
1
]
+
1
);
push
(
@Index0
,
$dimList_
[
$i
*
3
+
1
]
-
1
);
}
local
(
$Dim_
)
=
join
(
","
,
@Dim
);
local
(
$dim_
)
=
join
(
","
,
@dim
);
(
defined
$Dim
)
||
(
$Dim
=
$Dim_
);
(
$Dim
eq
$Dim_
)
||
die
"$fmeta: dimList Global inconsistent with other dataset\n"
;
(
defined
$dim
)
||
(
$dim
=
$dim_
);
(
$dim
eq
$dim_
)
||
die
"$fmeta: dimList Local inconsistent with other dataset\n"
;
$ds_Index0
{
$ds
}
=
join
(
","
,
@Index0
);
# print STDOUT "Okay $fmeta\n";
}
#-------------------------------
# check completeness of datasets
# need to be more sophisticated
#-------------------------------
sub
check_entirety
{
local
(
*
Dim
,
*
dim
,
*
ds_Index0
)
=
@_
;
local
(
$N
)
=
&
listprod
(
@Dim
);
local
(
$n
)
=
&
listprod
(
@dim
);
(
$N
)
||
return
0
;
# against null dimension
(
$n
)
||
return
0
;
# against null dimension
(
$N%$n
)
&&
return
0
;
# $N/$n must be a whole number
local
(
@ds
)
=
keys
%ds_Index0
;
(
$#ds
+
1
==
$N
/
$n
)
||
return
0
;
# Num of datasets must match subdomain
1
;
}
#------------------
# merge one dataset
# assume @Dim, @dim and $bytes existing
# assume $Byte_Reorder existing
#------------------
sub
merge_data
{
local
(
$ds
,
$dir
,
*
Index0
)
=
@_
;
local
(
$fdata
)
=
"$dir/$ds.data"
;
# data size of one subset in bytes as told by meta info
local
(
$size
)
=
&
listprod
(
@dim
)
*
$bytes
;
open
(
DFILE
,
"<$fdata"
)
||
die
"$fdata: $!\n"
;
local
(
$raw
)
=
""
;
#aja sysread(DFILE,$raw,4);
# Swap header if bytesex is diff from machine's
local
(
$hdr
);
if
(
$Byte_Reorder
)
{
$hdr
=
unpack
(
"I"
,
join
(
""
,
reverse
(
split
(
//
,
$raw
))));
}
else
{
$hdr
=
unpack
(
"I"
,
$raw
);
}
#aja ($size == $hdr) ||
#aja die "$fdata: $hdr bytes inconsistent with meta info\n";
print
STDOUT
"$ds.data: $size bytes, okay, "
;
# seek(DFILE,4,0); # rewind back to the beginning of data
local
(
$data
)
=
""
;
# old perl (< 4.0) needs this to
sysread
(
DFILE
,
$data
,
$size
);
# avoid warning by sysread()
local
(
$len_chunk
)
=
$dim
[
0
]
*
$bytes
;
local
(
$num_chunk
)
=
$size
/
$len_chunk
;
local
(
$pos
,
@index
,
$Pos
,
@Index
);
for
(
local
(
$i
)
=
0
;
$i
<
$num_chunk
;
$i
++
)
{
$pos
=
$i
*
$dim
[
0
];
@index
=
&
pos2index
(
$pos
,
@dim
);
@Index
=
&
lists_add
(
*
index
,
*
Index0
);
$Pos
=
&
index2pos
(
*
Index
,
*
Dim
);
#aja seek(FILE,$Pos*$bytes+4,0);
seek
(
FILE
,
$Pos
*
$bytes
,
0
);
syswrite
(
FILE
,
$data
,
$len_chunk
,
$pos
*
$bytes
);
}
close
(
DFILE
);
print
STDOUT
"merged from $dir\n"
;
}
#============
# main script
#============
#------------
# parse @ARGV
#............
(
$#ARGV
>=
1
)
||
&
usage
();
undef
@dirs
;
while
(
1
)
{
$x
=
shift
(
@ARGV
);
unless
(
$x
=~
/^-D(.+)$/
)
{
unshift
(
@ARGV
,
$x
);
last
;
}
push
(
@dirs
,
$1
);
}
(
@dirs
)
||
push
(
@dirs
,
"."
);
# @dirs is not empty after this line.
#print STDOUT join(" ",@dirs), "\n";
(
$#ARGV
>=
1
)
||
&
usage
();
# data set prefix and suffix
$pref
=
shift
(
@ARGV
);
$suff
=
shift
(
@ARGV
);
(
$#ARGV
>=
1
)
&&
&
usage
();
undef
$forced_bytesex
;
if
(
@ARGV
)
{
$forced_bytesex
=
shift
(
@ARGV
);
$forced_bytesex
=~
/^(little|big)-endian$/
||
&
usage
();
}
#print STDOUT $forced_bytesex, "\n";
#--------------------------
# obtain a list of datasets
#..........................
# %ds_dir is a hash to store the directory that a dataset is in.
# After this step, it is assured that, for a dataset $ds,
# both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}.
%ds_dir
=
();
foreach
$dir
(
@dirs
)
{
opendir
(
DIR
,
$dir
)
||
die
"$dir: $!\n"
;
@fmeta
=
grep
(
/^$pref\.$suff\.\d+\.\d+\.meta$/
,
readdir
(
DIR
));
closedir
(
DIR
);
foreach
$fmeta
(
@fmeta
)
{
$ds
=
$fmeta
;
$ds
=~
s/\.meta$//g
;
(
defined
$ds_dir
{
$ds
})
&&
die
"$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n"
;
(
-
f
"$dir/$ds.data"
)
||
die
"In $dir, $ds.data missing\n"
;
$ds_dir
{
$ds
}
=
$dir
;
}
}
@ds
=
sort
(
keys
%ds_dir
);
# list of datasets
(
@ds
)
||
die
"No dataset found.\n"
;
print
STDOUT
"There are "
,
$#ds
+
1
,
" datasets.\n"
;
#---------------------------------
# check meta info for all datasets
#.................................
undef
$timeStepNumber
;
undef
$nDims
;
undef
$format
;
undef
$Dim
;
undef
$dim
;
undef
%ds_Index0
;
#..............................................
# check each meta file and set some global vars
foreach
$ds
(
@ds
)
{
&
check_meta
(
$ds
,
$ds_dir
{
$ds
});
}
print
STDOUT
"All existing meta files are self- and mutually consistent.\n"
;
#print join(" ",$timeStepNumber,$nDims,$format,$Dim,$dim), "\n";
#foreach $ds (@ds) {
# $dir = $ds_dir{$ds};
# $Index0 = $ds_Index0{$ds};
# print "$ds\n";
# print "$Index0\n";
#}
@Dim
=
split
(
/,/
,
$Dim
);
@dim
=
split
(
/,/
,
$dim
);
#................................
# check meta info in its entirety
&
check_entirety
(
*
Dim
,
*
dim
,
*
ds_Index0
)
||
die
"Datasets are not complete!\n"
;
print
STDOUT
"Datasets are complete.\n"
;
#...........
# set $bytes
if
(
$format
eq
"float32"
)
{
$bytes
=
4
;
}
elsif
(
$format
eq
"float64"
)
{
$bytes
=
8
}
else
{
die
"format '$format' unknown\n"
;
}
#---------------------------
# check and merge data files
#...........................
#........................
# check machine's bytesex
# it dies if neither little- nor big-endian.
$Mach_Bytesex
=
&
mach_bytesex
();
print
STDOUT
"Current machine's endianness: $Mach_Bytesex\n"
;
#...................
# check file bytesex and resolve related issues
#aja undef $File_Bytesex;
#aja foreach $ds (@ds) {
#aja $fdata = "$ds.data";
#aja $file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata");
#aja ($file_bytesex eq "unknown") &&
#aja die "$fdata: endianness is neither little- nor big-endian.\n";
#aja print STDOUT "$fdata: $file_bytesex\n";
#aja unless ($File_Bytesex) {
#aja $File_Bytesex = $file_bytesex;
#aja } else {
#aja ($File_Bytesex eq $file_bytesex) ||
#aja die "Data files are mutually inconsistent in endianness\n";
#aja }
#aja }
$File_Bytesex
=
'big-endian'
;
#------------------
# set $Byte_Reorder, which controls swapping of bytes in
# header and terminator of Fortran unformatted data files.
#aja $Byte_Reorder = 0;
$Byte_Reorder
=
0
;
# if machine and data file have the same bytesex, no need for swapping
#aja ($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
# if we can't determine bytesex of data file, need forced one from @ARGV.
if
(
$File_Bytesex
eq
"undecidable"
)
{
# if no forced bytesex available, dies.
(
$forced_bytesex
)
||
die
"Endianness of data files is undecidable, "
.
"you have to give one at command line.\n"
;
(
$forced_bytesex
eq
$Mach_Bytesex
)
&&
(
$Byte_Reorder
=
0
);
print
STDOUT
"Endianness of data files is undecidable.\n"
;
print
STDOUT
"Data file header/tail will be treated as "
;
print
STDOUT
"$forced_bytesex as you have instructed.\n"
;
# otherwise
}
else
{
# give a warining, if swapping is needed.
(
$Byte_Reorder
)
&&
print
STDOUT
"Please note: data files have different bytesex than machine!\n"
;
}
#................
# merge data sets
$Size
=
&
listprod
(
@Dim
)
*
$bytes
;
$fout
=
"$pref.$suff.data"
;
open
(
FILE
,
">$fout"
)
||
die
"$fout: $!\n"
;
# prepare header and teminator. Do byte reordering if necessary
$HdrTmr
=
pack
(
"I"
,
$Size
);
(
$Byte_Reorder
)
&&
(
$HdrTmr
=
join
(
""
,
reverse
(
split
(
//
,
$HdrTmr
))));
# write 4 byte header
#aja syswrite(FILE,$HdrTmr,4);
# merge each dataset
foreach
$ds
(
@ds
)
{
$dir
=
$ds_dir
{
$ds
};
@Index0
=
split
(
/,/
,
$ds_Index0
{
$ds
});
&
merge_data
(
$ds
,
$dir
,
*
Index0
);
}
# write 4 byte terminator
#aja seek(FILE,$Size+4,0);
#aja syswrite(FILE,$HdrTmr,4);
close
(
FILE
);
print
STDOUT
"Global data ("
.
join
(
"x"
,
@Dim
)
.
") is in ./$fout (endianness is $File_Bytesex).\n"
;
exit
0
;
Event Timeline
Log In to Comment