Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102813669
joinds
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Feb 24, 11:06
Size
11 KB
Mime Type
text/x-perl
Expires
Wed, Feb 26, 11:06 (2 d)
Engine
blob
Format
Raw Data
Handle
24434531
Attached To
R2795 mitgcm_lac_leman_abirani
joinds
View Options
#!/usr/local/bin/perl -w
# MITgcmUV dataset joining utility.
# Tested with perl 4.0 and newer.
# Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25}
# Zhangfan XING, xing@pacific.jpl.nasa.gov
#
# LOGS:
# 980707, version 0.0.1, basically works
# 980721, version 0.2.0, proper handling of data file's header and terminator
# for diff bytesex.
#------
# usage
#------
sub usage {
print STDERR
"\nUsage:$0 [-Ddir0 -Ddir1 ...] " .
"prefix suffix [(little-endian|big-endian)]\n";
print STDERR "\nMITgcmUV dataset joining utility, version 0.2.0\n";
print STDERR
"Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n";
print STDERR "Report problem to xing\@pacific.jpl.nasa.gov\n\n";
exit 1;
}
#------------------------------
# product of a list of integers
#------------------------------
sub listprod {
local ($product) = 1;
local ($x);
foreach $x (@_) {
$product *= $x;
}
$product;
}
#----------------
# @list1 + @list2
#----------------
sub lists_add {
local (*l1,*l2) = @_;
($#l1 == $#l2) || return undef;
local (@l);
for (local($i)=0;$i<=$#l1;$i++) {
$l[$i]=$l1[$i]+$l2[$i];
}
@l;
}
#-------------
# pos to index
# 0-based.
#-------------
sub pos2index {
local ($pos,@dim) = @_;
local ($rightmost) = pop(@dim);
local (@index,$d);
foreach $d (@dim) {
push(@index,$pos%$d);
$pos = int($pos/$d);
}
# self-guarding
unless ($rightmost > $pos) {
return undef;
}
push(@index,$pos);
@index;
}
#-------------
# index to pos
# 0-based.
#-------------
sub index2pos {
local (*index,*dim) = @_;
return undef unless ($#index == $#dim);
local ($pos) = $index[$#index];
for (local($i)=$#dim;$i>0;$i--) {
$pos = $pos * $dim[$i-1] + $index[$i-1];
}
$pos;
}
#-------------------------
# check machine's bytesex.
# returns "little-endian" or "big-endian"
# or dies if unable to figure out
#-------------------------
sub mach_bytesex {
local ($foo) = pack("s2",1,2);
if ($foo eq "\1\0\2\0") {
return "little-endian";
} elsif ($foo eq "\0\1\0\2") {
return "big-endian";
} else {
die "Your machine has a strange bytesex.\n".
"Email your platform info to xing\@pacific.jpl.nasa.gov\n";
}
}
#--------------------------------------------------
# check bytesex of a fortran unformatted data file
# current machine's bytesex is used as a reference.
# returns: one of "little-endian", "big-endian", "undecidable" and "unknown"
#--------------------------------------------------
sub file_bytesex {
# only if this platform's bytesex is either big- or little-endian
# otherwise dies. Hope this won't happen.
local($mach_bytesex) = &mach_bytesex();
local ($file) = shift;
local (*FILE);
open(FILE,$file) || die "$file: $!\n";
local(@fstat) = stat(FILE);
local ($size) = $fstat[7] - 8; # total data size in bytes
local($hdr,$tmr) = ("","");
read(FILE,$hdr,4);
seek(FILE,-4,2);
read(FILE,$tmr,4);
close(FILE);
# this part checks for self-consistency of Fortran unformatted file
($hdr eq $tmr) || die "$file: not a Fortran unformatted data file.\n";
local ($ori) = unpack("I",$hdr);
local ($rev) = unpack("I",join("",reverse(split(//,$hdr))));
($ori != $size && $rev != $size) &&
return "unknown";
($ori == $size && $rev == $size) &&
return "undecidable";
local ($opposite) = ($mach_bytesex eq "little-endian") ?
"big-endian" : "little-endian";
return ($ori == $size) ? $mach_bytesex : $opposite;
}
#--------------------------------
# check meta info for one dataset
#--------------------------------
sub check_meta {
local ($ds,$dir) = @_;
local ($fmeta) = "$dir/$ds.meta";
#~~~~~~~~~~~~~~~~
# check meta info
#~~~~~~~~~~~~~~~~
undef $/; # read to the end of file
open(MFILE,"<$fmeta") || die "$fmeta: $!\n";
$_=<MFILE>;
close(MFILE);
$/ = "\n"; # never mess up
s/\([^)]*\)//g; #rm (.*)
s/\/\/[^\n]*\n//g; #rm comment lines
s/\/\*.*\*\///g; #rm inline comments
s/\s+//g; #rm white spaces
/id=\[(.+)\];nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];/
|| die "$fmeta: meta file format error\n";
local ($id_,$nDims_,$dimList_,$format_) = ($1,$2,$3,$4);
# check Identifier
(defined $id) || ($id = $id_);
($id eq $id_) ||
die "$fmeta: id $id_ inconsistent with other dataset\n";
# check Number of dimensions
(defined $nDims) || ($nDims = $nDims_);
($nDims eq $nDims_) ||
die "$fmeta: nDims $nDims_ inconsistent with other dataset\n";
# check Field format
(defined $format) || ($format = $format_);
($format eq $format_) ||
die "$fmeta: format $format_ inconsistent with other dataset\n";
# check dimList
# calc dimesions and leading index of this subset
local (@dimList_) = split(/,/,$dimList_);
($nDims_*3 == $#dimList_+1) ||
die "$fmeta: nDims and dimList conflicting\n";
local (@Dim,@dim,@Index0) = ();
for (local($i)=0;$i<$nDims_;$i++) {
push(@Dim,$dimList_[$i*3]);
push(@dim,$dimList_[$i*3+2]-$dimList_[$i*3+1]+1);
push(@Index0,$dimList_[$i*3+1]-1);
}
local ($Dim_) = join(",",@Dim);
local ($dim_) = join(",",@dim);
(defined $Dim) || ($Dim = $Dim_);
($Dim eq $Dim_) ||
die "$fmeta: dimList Global inconsistent with other dataset\n";
(defined $dim) || ($dim = $dim_);
($dim eq $dim_) ||
die "$fmeta: dimList Local inconsistent with other dataset\n";
$ds_Index0{$ds} = join(",", @Index0);
# print STDOUT "Okay $fmeta\n";
}
#-------------------------------
# check completeness of datasets
# need to be more sophisticated
#-------------------------------
sub check_entirety {
local (*Dim,*dim,*ds_Index0) = @_;
local ($N) = &listprod(@Dim);
local ($n) = &listprod(@dim);
($N) || return 0; # against null dimension
($n) || return 0; # against null dimension
($N%$n) && return 0; # $N/$n must be a whole number
local (@ds) = keys %ds_Index0;
($#ds+1 == $N/$n) || return 0; # Num of datasets must match subdomain
1;
}
#------------------
# merge one dataset
# assume @Dim, @dim and $bytes existing
# assume $Byte_Reorder existing
#------------------
sub merge_data {
local ($ds,$dir,*Index0) = @_;
local ($fdata) = "$dir/$ds.data";
# data size of one subset in bytes as told by meta info
local ($size) = &listprod(@dim) * $bytes;
open(DFILE, "<$fdata") || die "$fdata: $!\n";
local ($raw) = "";
sysread(DFILE,$raw,4);
# Swap header if bytesex is diff from machine's
local ($hdr);
if ($Byte_Reorder) {
$hdr = unpack("I",join("",reverse(split(//,$raw))));
} else {
$hdr = unpack("I",$raw);
}
($size == $hdr) ||
die "$fdata: $hdr bytes inconsistent with meta info\n";
print STDOUT "$ds.data: $size bytes, okay, ";
# seek(DFILE,4,0); # rewind back to the beginning of data
local ($data) = ""; # old perl (< 4.0) needs this to
sysread(DFILE,$data,$size); # avoid warning by sysread()
local ($len_chunk) = $dim[0] * $bytes;
local ($num_chunk) = $size/$len_chunk;
local ($pos,@index,$Pos,@Index);
for (local($i)=0;$i<$num_chunk;$i++) {
$pos = $i * $dim[0];
@index = &pos2index($pos,@dim);
@Index = &lists_add(*index,*Index0);
$Pos = &index2pos(*Index,*Dim);
seek(FILE,$Pos*$bytes+4,0);
syswrite(FILE,$data,$len_chunk,$pos*$bytes);
}
close(DFILE);
print STDOUT "merged from $dir\n";
}
#============
# main script
#============
#------------
# parse @ARGV
#............
($#ARGV >= 1) || &usage();
undef @dirs;
while (1) {
$x = shift(@ARGV);
unless ($x =~ /^-D(.+)$/) {
unshift(@ARGV,$x);
last;
}
push(@dirs,$1);
}
(@dirs) || push(@dirs,".");
# @dirs is not empty after this line.
#print STDOUT join(" ",@dirs), "\n";
($#ARGV >= 1) || &usage();
# data set prefix and suffix
$pref = shift(@ARGV);
$suff = shift(@ARGV);
($#ARGV >= 1) && &usage();
undef $forced_bytesex;
if (@ARGV) {
$forced_bytesex = shift(@ARGV);
$forced_bytesex =~ /^(little|big)-endian$/ || &usage();
}
#print STDOUT $forced_bytesex, "\n";
#--------------------------
# obtain a list of datasets
#..........................
# %ds_dir is a hash to store the directory that a dataset is in.
# After this step, it is assured that, for a dataset $ds,
# both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}.
%ds_dir = ();
foreach $dir (@dirs) {
opendir(DIR, $dir) || die "$dir: $!\n";
@fmeta = grep(/^$pref\.$suff\.p\d+\.t\d+\.meta$/, readdir(DIR));
closedir(DIR);
foreach $fmeta (@fmeta) {
$ds = $fmeta; $ds =~ s/\.meta$//g;
(defined $ds_dir{$ds}) &&
die "$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n";
(-f "$dir/$ds.data") || die "In $dir, $ds.data missing\n";
$ds_dir{$ds} = $dir;
}
}
@ds = sort(keys %ds_dir); # list of datasets
(@ds) || die "No dataset found.\n";
print STDOUT "There are ", $#ds+1, " datasets.\n";
#---------------------------------
# check meta info for all datasets
#.................................
undef $id;
undef $nDims;
undef $format;
undef $Dim;
undef $dim;
undef %ds_Index0;
#..............................................
# check each meta file and set some global vars
foreach $ds (@ds) {
&check_meta($ds,$ds_dir{$ds});
}
print STDOUT "All existing meta files are self- and mutually consistent.\n";
#print join(" ",$id,$nDims,$format,$Dim,$dim), "\n";
#foreach $ds (@ds) {
# $dir = $ds_dir{$ds};
# $Index0 = $ds_Index0{$ds};
# print "$ds\n";
# print "$Index0\n";
#}
@Dim = split(/,/,$Dim);
@dim = split(/,/,$dim);
#................................
# check meta info in its entirety
&check_entirety(*Dim,*dim,*ds_Index0) ||
die "Datasets are not complete!\n";
print STDOUT "Datasets are complete.\n";
#...........
# set $bytes
if ($format eq "float32") {
$bytes = 4;
} elsif ($format eq "float64") {
$bytes = 8
} else {
die "format '$format' unknown\n";
}
#---------------------------
# check and merge data files
#...........................
#........................
# check machine's bytesex
# it dies if neither little- nor big-endian.
$Mach_Bytesex = &mach_bytesex();
print STDOUT "Current machine's endianness: $Mach_Bytesex\n";
#...................
# check file bytesex and resolve realted issues
undef $File_Bytesex;
foreach $ds (@ds) {
$fdata = "$ds.data";
$file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata");
($file_bytesex eq "unknown") &&
die "$fdata: endianness is neither little- nor big-endian.\n";
print STDOUT "$fdata: $file_bytesex\n";
unless ($File_Bytesex) {
$File_Bytesex = $file_bytesex;
} else {
($File_Bytesex eq $file_bytesex) ||
die "Data files are mutually inconsistent in endianness\n";
}
}
#------------------
# set $Byte_Reorder, which controls swapping of bytes in
# header and terminator of Fortran unformatted data files.
$Byte_Reorder = 1;
# if machine and data file have the same bytesex, no need for swapping
($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
# if we can't determine bytesex of data file, need forced one from @ARGV.
if ($File_Bytesex eq "undecidable") {
# if no forced bytesex available, dies.
($forced_bytesex) ||
die "Endianness of data files is undecidable, " .
"you have to give one at command line.\n";
($forced_bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
print STDOUT "Endianness of data files is undecidable.\n";
print STDOUT "Data file header/tail will be treated as ";
print STDOUT "$forced_bytesex as you have instructed.\n";
# otherwise
} else {
# give a warining, if swapping is needed.
($Byte_Reorder) &&
print STDOUT
"Please note: data files have different bytesex than machine!\n";
}
#................
# merge data sets
$Size = &listprod(@Dim) * $bytes;
$fout = "$pref.$suff.data";
open(FILE, ">$fout") || die "$fout: $!\n";
# prepare header and teminator. Do byte reordering if necessary
$HdrTmr = pack("I",$Size);
($Byte_Reorder) && ($HdrTmr = join("",reverse(split(//,$HdrTmr))));
# write 4 byte header
syswrite(FILE,$HdrTmr,4);
# merge each dataset
foreach $ds (@ds) {
$dir = $ds_dir{$ds};
@Index0 = split(/,/,$ds_Index0{$ds});
&merge_data($ds,$dir,*Index0);
}
# write 4 byte terminator
seek(FILE,$Size+4,0);
syswrite(FILE,$HdrTmr,4);
close(FILE);
print STDOUT "Global data (" .
join("x",@Dim) .
") is in ./$fout (endianness is $File_Bytesex).\n";
exit 0;
Event Timeline
Log In to Comment