Page MenuHomec4science

joinmds
No OneTemporary

File Metadata

Created
Mon, Feb 24, 13:31
#!/usr/local/bin/perl -w
# MITgcmUV DataSet joining utility.
# Tested with perl 4.0 and newer.
# Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25}
# Zhangfan XING, xing@pacific.jpl.nasa.gov
# Adapted to work with MDS I/O format by adcroft@mit.edu, 5/7/1999
#
# LOGS:
# 980707, version 0.0.1, basically works
# 980721, version 0.2.0, proper handling of data file's header and terminator
# for diff bytesex.
# 990507, HACK'd by AJA. Needs to be properly merged with the original joinds
#------
# usage
#------
sub usage {
print STDERR
"\nUsage:$0 [-Ddir0 -Ddir1 ...] " .
"prefix suffix [(little-endian|big-endian)]\n";
print STDERR "\nMITgcmUV DataSet joining utility, version 0.3.0\n";
print STDERR
"Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n";
print STDERR "Report problem to xing\@pacific.jpl.nasa.gov\n\n";
exit 1;
}
#------------------------------
# product of a list of integers
#------------------------------
sub listprod {
local ($product) = 1;
local ($x);
foreach $x (@_) {
$product *= $x;
}
$product;
}
#----------------
# @list1 + @list2
#----------------
sub lists_add {
local (*l1,*l2) = @_;
($#l1 == $#l2) || return undef;
local (@l);
for (local($i)=0;$i<=$#l1;$i++) {
$l[$i]=$l1[$i]+$l2[$i];
}
@l;
}
#-------------
# pos to index
# 0-based.
#-------------
sub pos2index {
local ($pos,@dim) = @_;
local ($rightmost) = pop(@dim);
local (@index,$d);
foreach $d (@dim) {
push(@index,$pos%$d);
$pos = int($pos/$d);
}
# self-guarding
unless ($rightmost > $pos) {
return undef;
}
push(@index,$pos);
@index;
}
#-------------
# index to pos
# 0-based.
#-------------
sub index2pos {
local (*index,*dim) = @_;
return undef unless ($#index == $#dim);
local ($pos) = $index[$#index];
for (local($i)=$#dim;$i>0;$i--) {
$pos = $pos * $dim[$i-1] + $index[$i-1];
}
$pos;
}
#-------------------------
# check machine's bytesex.
# returns "little-endian" or "big-endian"
# or dies if unable to figure out
#-------------------------
sub mach_bytesex {
local ($foo) = pack("s2",1,2);
if ($foo eq "\1\0\2\0") {
return "little-endian";
} elsif ($foo eq "\0\1\0\2") {
return "big-endian";
} else {
die "Your machine has a strange bytesex.\n".
"Email your platform info to xing\@pacific.jpl.nasa.gov\n";
}
}
#--------------------------------------------------
# check bytesex of a fortran unformatted data file
# current machine's bytesex is used as a reference.
# returns: one of "little-endian", "big-endian", "undecidable" and "unknown"
#--------------------------------------------------
sub file_bytesex {
# only if this platform's bytesex is either big- or little-endian
# otherwise dies. Hope this won't happen.
local($mach_bytesex) = &mach_bytesex();
local ($file) = shift;
local (*FILE);
open(FILE,$file) || die "$file: $!\n";
local(@fstat) = stat(FILE);
local ($size) = $fstat[7] - 8; # total data size in bytes
local($hdr,$tmr) = ("","");
read(FILE,$hdr,4);
seek(FILE,-4,2);
read(FILE,$tmr,4);
close(FILE);
# this part checks for self-consistency of Fortran unformatted file
($hdr eq $tmr) || die "$file: not a Fortran unformatted data file.\n";
local ($ori) = unpack("I",$hdr);
local ($rev) = unpack("I",join("",reverse(split(//,$hdr))));
($ori != $size && $rev != $size) &&
return "unknown";
($ori == $size && $rev == $size) &&
return "undecidable";
local ($opposite) = ($mach_bytesex eq "little-endian") ?
"big-endian" : "little-endian";
return ($ori == $size) ? $mach_bytesex : $opposite;
}
#--------------------------------
# check meta info for one dataset
#--------------------------------
sub check_meta {
local ($ds,$dir) = @_;
local ($fmeta) = "$dir/$ds.meta";
#~~~~~~~~~~~~~~~~
# check meta info
#~~~~~~~~~~~~~~~~
undef $/; # read to the end of file
open(MFILE,"<$fmeta") || die "$fmeta: $!\n";
$_=<MFILE>;
close(MFILE);
$/ = "\n"; # never mess up
s/\([^)]*\)//g; #rm (.*)
s/\/\/[^\n]*\n//g; #rm comment lines
s/\/\*.*\*\///g; #rm inline comments
s/\s+//g; #rm white spaces
/nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];nrecords=\[(.+)\];timeStepNumber=\[(.+)\];/
|| die "$fmeta: meta file format error\n";
local ($nDims_,$dimList_,$format_,$nrecords_,$timeStepNumber_) = ($1,$2,$3,$4,$5);
# check Identifier
(defined $timeStepNumber) || ($timeStepNumber = $timeStepNumber_);
($timeStepNumber eq $timeStepNumber_) ||
die "$fmeta: timeStepNumber $timeStepNumber_ inconsistent with other dataset\n";
# check Number of dimensions
(defined $nDims) || ($nDims = $nDims_);
($nDims eq $nDims_) ||
die "$fmeta: nDims $nDims_ inconsistent with other dataset\n";
# check Field format
(defined $format) || ($format = $format_);
($format eq $format_) ||
die "$fmeta: format $format_ inconsistent with other dataset\n";
# check dimList
# calc dimesions and leading index of this subset
local (@dimList_) = split(/,/,$dimList_);
($nDims_*3 == $#dimList_+1) ||
die "$fmeta: nDims and dimList conflicting\n";
local (@Dim,@dim,@Index0) = ();
for (local($i)=0;$i<$nDims_;$i++) {
push(@Dim,$dimList_[$i*3]);
push(@dim,$dimList_[$i*3+2]-$dimList_[$i*3+1]+1);
push(@Index0,$dimList_[$i*3+1]-1);
}
local ($Dim_) = join(",",@Dim);
local ($dim_) = join(",",@dim);
(defined $Dim) || ($Dim = $Dim_);
($Dim eq $Dim_) ||
die "$fmeta: dimList Global inconsistent with other dataset\n";
(defined $dim) || ($dim = $dim_);
($dim eq $dim_) ||
die "$fmeta: dimList Local inconsistent with other dataset\n";
$ds_Index0{$ds} = join(",", @Index0);
# print STDOUT "Okay $fmeta\n";
}
#-------------------------------
# check completeness of datasets
# need to be more sophisticated
#-------------------------------
sub check_entirety {
local (*Dim,*dim,*ds_Index0) = @_;
local ($N) = &listprod(@Dim);
local ($n) = &listprod(@dim);
($N) || return 0; # against null dimension
($n) || return 0; # against null dimension
($N%$n) && return 0; # $N/$n must be a whole number
local (@ds) = keys %ds_Index0;
($#ds+1 == $N/$n) || return 0; # Num of datasets must match subdomain
1;
}
#------------------
# merge one dataset
# assume @Dim, @dim and $bytes existing
# assume $Byte_Reorder existing
#------------------
sub merge_data {
local ($ds,$dir,*Index0) = @_;
local ($fdata) = "$dir/$ds.data";
# data size of one subset in bytes as told by meta info
local ($size) = &listprod(@dim) * $bytes;
open(DFILE, "<$fdata") || die "$fdata: $!\n";
local ($raw) = "";
#aja sysread(DFILE,$raw,4);
# Swap header if bytesex is diff from machine's
local ($hdr);
if ($Byte_Reorder) {
$hdr = unpack("I",join("",reverse(split(//,$raw))));
} else {
$hdr = unpack("I",$raw);
}
#aja ($size == $hdr) ||
#aja die "$fdata: $hdr bytes inconsistent with meta info\n";
print STDOUT "$ds.data: $size bytes, okay, ";
# seek(DFILE,4,0); # rewind back to the beginning of data
local ($data) = ""; # old perl (< 4.0) needs this to
sysread(DFILE,$data,$size); # avoid warning by sysread()
local ($len_chunk) = $dim[0] * $bytes;
local ($num_chunk) = $size/$len_chunk;
local ($pos,@index,$Pos,@Index);
for (local($i)=0;$i<$num_chunk;$i++) {
$pos = $i * $dim[0];
@index = &pos2index($pos,@dim);
@Index = &lists_add(*index,*Index0);
$Pos = &index2pos(*Index,*Dim);
#aja seek(FILE,$Pos*$bytes+4,0);
seek(FILE,$Pos*$bytes,0);
syswrite(FILE,$data,$len_chunk,$pos*$bytes);
}
close(DFILE);
print STDOUT "merged from $dir\n";
}
#============
# main script
#============
#------------
# parse @ARGV
#............
($#ARGV >= 1) || &usage();
undef @dirs;
while (1) {
$x = shift(@ARGV);
unless ($x =~ /^-D(.+)$/) {
unshift(@ARGV,$x);
last;
}
push(@dirs,$1);
}
(@dirs) || push(@dirs,".");
# @dirs is not empty after this line.
#print STDOUT join(" ",@dirs), "\n";
($#ARGV >= 1) || &usage();
# data set prefix and suffix
$pref = shift(@ARGV);
$suff = shift(@ARGV);
($#ARGV >= 1) && &usage();
undef $forced_bytesex;
if (@ARGV) {
$forced_bytesex = shift(@ARGV);
$forced_bytesex =~ /^(little|big)-endian$/ || &usage();
}
#print STDOUT $forced_bytesex, "\n";
#--------------------------
# obtain a list of datasets
#..........................
# %ds_dir is a hash to store the directory that a dataset is in.
# After this step, it is assured that, for a dataset $ds,
# both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}.
%ds_dir = ();
foreach $dir (@dirs) {
opendir(DIR, $dir) || die "$dir: $!\n";
@fmeta = grep(/^$pref\.$suff\.\d+\.\d+\.meta$/, readdir(DIR));
closedir(DIR);
foreach $fmeta (@fmeta) {
$ds = $fmeta; $ds =~ s/\.meta$//g;
(defined $ds_dir{$ds}) &&
die "$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n";
(-f "$dir/$ds.data") || die "In $dir, $ds.data missing\n";
$ds_dir{$ds} = $dir;
}
}
@ds = sort(keys %ds_dir); # list of datasets
(@ds) || die "No dataset found.\n";
print STDOUT "There are ", $#ds+1, " datasets.\n";
#---------------------------------
# check meta info for all datasets
#.................................
undef $timeStepNumber;
undef $nDims;
undef $format;
undef $Dim;
undef $dim;
undef %ds_Index0;
#..............................................
# check each meta file and set some global vars
foreach $ds (@ds) {
&check_meta($ds,$ds_dir{$ds});
}
print STDOUT "All existing meta files are self- and mutually consistent.\n";
#print join(" ",$timeStepNumber,$nDims,$format,$Dim,$dim), "\n";
#foreach $ds (@ds) {
# $dir = $ds_dir{$ds};
# $Index0 = $ds_Index0{$ds};
# print "$ds\n";
# print "$Index0\n";
#}
@Dim = split(/,/,$Dim);
@dim = split(/,/,$dim);
#................................
# check meta info in its entirety
&check_entirety(*Dim,*dim,*ds_Index0) ||
die "Datasets are not complete!\n";
print STDOUT "Datasets are complete.\n";
#...........
# set $bytes
if ($format eq "float32") {
$bytes = 4;
} elsif ($format eq "float64") {
$bytes = 8
} else {
die "format '$format' unknown\n";
}
#---------------------------
# check and merge data files
#...........................
#........................
# check machine's bytesex
# it dies if neither little- nor big-endian.
$Mach_Bytesex = &mach_bytesex();
print STDOUT "Current machine's endianness: $Mach_Bytesex\n";
#...................
# check file bytesex and resolve related issues
#aja undef $File_Bytesex;
#aja foreach $ds (@ds) {
#aja $fdata = "$ds.data";
#aja $file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata");
#aja ($file_bytesex eq "unknown") &&
#aja die "$fdata: endianness is neither little- nor big-endian.\n";
#aja print STDOUT "$fdata: $file_bytesex\n";
#aja unless ($File_Bytesex) {
#aja $File_Bytesex = $file_bytesex;
#aja } else {
#aja ($File_Bytesex eq $file_bytesex) ||
#aja die "Data files are mutually inconsistent in endianness\n";
#aja }
#aja }
$File_Bytesex = 'big-endian';
#------------------
# set $Byte_Reorder, which controls swapping of bytes in
# header and terminator of Fortran unformatted data files.
#aja $Byte_Reorder = 0;
$Byte_Reorder = 0;
# if machine and data file have the same bytesex, no need for swapping
#aja ($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
# if we can't determine bytesex of data file, need forced one from @ARGV.
if ($File_Bytesex eq "undecidable") {
# if no forced bytesex available, dies.
($forced_bytesex) ||
die "Endianness of data files is undecidable, " .
"you have to give one at command line.\n";
($forced_bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
print STDOUT "Endianness of data files is undecidable.\n";
print STDOUT "Data file header/tail will be treated as ";
print STDOUT "$forced_bytesex as you have instructed.\n";
# otherwise
} else {
# give a warining, if swapping is needed.
($Byte_Reorder) &&
print STDOUT
"Please note: data files have different bytesex than machine!\n";
}
#................
# merge data sets
$Size = &listprod(@Dim) * $bytes;
$fout = "$pref.$suff.data";
open(FILE, ">$fout") || die "$fout: $!\n";
# prepare header and teminator. Do byte reordering if necessary
$HdrTmr = pack("I",$Size);
($Byte_Reorder) && ($HdrTmr = join("",reverse(split(//,$HdrTmr))));
# write 4 byte header
#aja syswrite(FILE,$HdrTmr,4);
# merge each dataset
foreach $ds (@ds) {
$dir = $ds_dir{$ds};
@Index0 = split(/,/,$ds_Index0{$ds});
&merge_data($ds,$dir,*Index0);
}
# write 4 byte terminator
#aja seek(FILE,$Size+4,0);
#aja syswrite(FILE,$HdrTmr,4);
close(FILE);
print STDOUT "Global data (" .
join("x",@Dim) .
") is in ./$fout (endianness is $File_Bytesex).\n";
exit 0;

Event Timeline