Page MenuHomec4science

gluemnc
No OneTemporary

File Metadata

Created
Mon, Feb 24, 10:16
#! /usr/bin/env bash
#
# $Header: /u/gcmpack/MITgcm/utils/scripts/gluemnc,v 1.16 2011/11/10 14:26:16 mlosch Exp $
# $Name: $
# This is a shell script to combine multiple MITgcm mnc output files from
# different tiles into one global file.
# All of the variables should be in one directory, where this script is run.
#
# To combine all state.0000000000.t*.nc files,
# gluemnc state.0000000000.*.nc
# This will result in an output file state.0000000000.glob.nc
# Where glob is for global.
#
# You can even combine all mnc files, use
# gluemnc *.nc
# This will result in a series of global files,
# state.0000000000.glob.nc state.0000000100.glob.nc, ...
# grid.0000000000.glob.nc grid.0000000100.glob.nc, ...
# diag.0000000000.glob.nc diag.0000000100.glob.nc, ...
#
# A lot of hard drive activity is needed. If you have a fast drive
# export TMPDIR=<path of hard drive>. On some high-performance
# systems, this is already done for you.
#
# **********WARNINGS**********
# This will probably not work at all with exch2/cubed sphere.
# In that case, you probably can assemble all of the tiles on a face,
# but combining faces is currently not implemented.
#
# Be sure you have enough disk space for the copies! In this version
# nothing is done to assure all of the data is copied.
#
# Be careful! It will be easy to exceed the 2 GB limit for the old 32-bit
# version of netcdf. If you do not have large-file support or 64-bit netcdf
# you will have to be clever in dividing up your tiled files,
# e.g., along the time dimension before combining to global files.
# The nco operator ncks is adept at shortening files to fewer snapshots.
# *****************************
#
# Good luck and happy gluing,
# Baylor Fox-Kemper
DEBUG="--dbg_lvl=0"
LOGFILE="/dev/null"
DIRORIG=`pwd`
if [ ! ${#TMPDIR} -gt 0 ]; then
TMPDIR=$DIRORIG
fi
export DIRNAME="$TMPDIR/gluedir.$RANDOM"
mkdir $DIRNAME
echo Using temporary directory $DIRNAME
if [ -f xplodemnc ]; then
cp xplodemnc $DIRNAME
else
cp `which xplodemnc` $DIRNAME
fi
# find an unambiguous name for a new record dimension
myrecname=record`echo $DIRNAME | awk -F. '{print $NF}'`
cd $DIRNAME
inone=$1
inone=${1:?"You must input mnc filenames to be glued"}
for somefile in $@
do
ln -s $DIRORIG/$somefile .
if [ ! -s $somefile ]; then
echo "Error: $somefile is missing or empty"
exit 1
fi
done
prels=${@%.t???.nc}
for somepre in $prels
do
inls=0
for somepres in $sprels
do
if [ "$somepre" = "$somepres" ]; then
inls=1
fi
done
if [ "$inls" = "0" ]; then
sprels=$sprels" "$somepre
fi
done
prels=$sprels
# ML: determine the coordinate variable (this is hack for the unlikely
# case that we do not have X or Y as coordinate variables; this can
# happen, when only U-point or V-point variables are written to a
# diagnostics stream; I do not know if this always works, but it works for me)
echo Determine a usable coordinate variable
somefile=${prels}.t001.nc
# first try X and Y
Xcoord=X
Ycoord=Y
Xtest=$(ncdump -vX -l 10000 $somefile | grep "X = ")
Ytest=$(ncdump -vY -l 10000 $somefile | grep "Y = ")
if [ ${#Xtest} = 0 ]; then
echo "X not found, trying Xp1"
Xtest=$(ncdump -vXp1 -l 10000 $somefile | grep "Xp1 = ")
Xcoord=Xp1
fi
if [ ${#Xtest} = 0 ]; then
echo "no X-coordinate found"
Xcoord=
fi
if [ ${#Ytest} = 0 ]; then
echo "Y not found, trying Yp1"
Ytest=$(ncdump -vYp1 -l 10000 $somefile | grep "Yp1 = ")
Ycoord=Yp1
fi
if [ ${#Ytest} = 0 ]; then
echo "no Y-coordinate found"
Ycoord=
fi
if [ ${#Xcoord} = 0 ]; then
echo cannot continue
exit
fi
if [ ${#Ycoord} = 0 ]; then
echo cannot continue
exit
fi
for somepre in $prels
do
echo Making $somepre.glob.nc...
Xsls=
Ysls=
for somefile in $@
do
if [ "${somefile%.t???.nc}" = "$somepre" ]; then
echo Scanning $somefile...
Xs=$(ncdump -v${Xcoord} -l 10000 $somefile | grep "${Xcoord} = ")
Xs=${Xs#*;}
Xs=${Xs%;*}
Xs=$(echo $Xs | sed s/' '//g)
Xsls=$Xsls$Xs" "
Ys=$(ncdump -v${Ycoord} -l 10000 $somefile | grep "${Ycoord} = ")
Ys=${Ys#*;}
Ys=${Ys%;*}
Ys=$(echo $Ys | sed s/' '//g)
Ysls=$Ysls$Ys" "
fi
done
sYsls=
sXsls=
# Determine all the X locations
countx=0
for someXs in $Xsls
do
inls=0
for somesXs in $sXsls
do
if [ "$someXs" = "$somesXs" ]; then
inls=1
fi
done
if [ "$inls" = "0" ]; then
sXsls=$sXsls$someXs" "
countx=$((countx))+1
fi
done
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
echo $((countx)) tiles found in x-direction.
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# Determine all the Y locations
county=0
for someYs in $Ysls
do
inls=0
for somesYs in $sYsls
do
if [ "$someYs" = "$somesYs" ]; then
inls=1
fi
done
if [ "$inls" = "0" ]; then
sYsls=$sYsls$someYs" "
county=$((county))+1
fi
done
echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
echo $((county)) tiles found in y-direction.
echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
countyy=1000
countxx=1000
cntls=
for someX in $sXsls
do
countxx=$((countxx+1))
cntls=$cntls$countxx" "
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
echo Prepping X tile $((countxx-1000))
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
for somefile in $@
do
if [ "${somefile%.t???.nc}" = "$somepre" ]; then
Xs=$(ncdump -v${Xcoord} -l 10000 $somefile | grep "${Xcoord} = ")
Xs=${Xs#*;}
Xs=${Xs%;*}
Xs=$(echo $Xs | sed s/' '//g)
if [ "$someX" = $Xs ]; then
./xplodemnc $somefile
if [ -f iter.$somefile ]; then
mv iter.$somefile iter.${somefile%t???.nc}glob.nc
fi
for somesplit in $(ls *.$somefile)
do
# Added to account for grid files with no T dimension defined:
# Search for the unlimited dimension and get its name, assuming
# that its first character is a letter from the alphabet, the "tr"
# removes the blank characters
recname=$(ncdump -h $somesplit \
| sed -n 's/\([a-z,A-Z]*\) = \(UNLIMITED\) .*/\1/p' \
| tr -d ' \t' )
if [[ -z "$recname" ]]; then
echo "No record dimension found, adding one now: "$myrecname
ncecat $DEBUG -O -u $myrecname $somesplit $somesplit > $LOGFILE
recname=$myrecname
fi
withY=$(ncdump -h $somesplit | grep "Y =")
if [ ${#withY} -gt 1 ]; then
echo Changing Y to record variable in $somesplit
ncpdq $DEBUG -O -a Y,$recname $somesplit $somesplit > $LOGFILE
mv $somesplit i$countxx.$somesplit
fi
if [ -f $somesplit ]; then
withYp1=$(ncdump -h $somesplit | grep "Yp1 =")
if [ ${#withYp1} -gt 1 ]; then
Yp1len=${withYp1#*= }
Yp1len=$((${Yp1len% ;}-1))
# Strip off the repeated value in Yp1
echo Changing Yp1 to record variable in $somesplit
ncpdq $DEBUG -O -a Yp1,$recname -F -d Yp1,1,$Yp1len $somesplit $somesplit > $LOGFILE
mv $somesplit i$countxx.$somesplit
fi
fi
done
fi
fi
done
done
echo Tile names $cntls
for countxx in $cntls
do
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
echo Combining X tile $((countxx-1000))
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
varls=
cxfilels=$(ls i$countxx.*)
oldvar=
for somefile in $cxfilels
do
varname=`echo $somefile | sed 's/^i[0-9][0-9][0-9][0-9]\.//; s/\..*nc//'`
if [ "$varname" = "$oldvar" ]; then
echo $varname repeated
else
varls=$varls$varname" "
fi
oldvar=$varname
done
echo Found these variables to combine: $varls
for somevar in $varls
do
echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
echo Combining $somevar files in Y
echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
filelist=(`ls i$countxx.$somevar.$somepre.*.nc`)
withY=$(ncdump -h ${filelist[0]} | grep "Y =")
withYp1=$(ncdump -h ${filelist[0]} | grep "Yp1 =")
ncrcat $DEBUG i$countxx.$somevar.$somepre.*.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE
echo Just combined $countxx.$somevar
rm i$countxx.$somevar.$somepre.t???.nc
# Recover the name of the record variable, there are two possibilities:
# T (MITgcm convention) and $myrecname.
# I must admit that this could be more elegant.
recname=$(ncdump -h $somevar.$somepre.gloy.$countxx.nc | grep $myrecname)
if [[ -z $recname ]]; then
recname=T
else
recname=$myrecname
fi
if [ ${#withY} -gt 1 ]; then
echo Changing $recname to record variable in $somevar.$somepre.gloy.$countxx.nc
ncpdq $DEBUG -O -a $recname,Y $somevar.$somepre.gloy.$countxx.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE
fi
if [ ${#withYp1} -gt 1 ]; then
echo Changing $recname to record variable in $somevar.$somepre.gloy.$countxx.nc
ncpdq $DEBUG -O -a $recname,Yp1 $somevar.$somepre.gloy.$countxx.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE
fi
done
done
for somevar in $varls
do
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
echo Combining $somevar files in X...
echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
for somegloy in $(ls $somevar.$somepre.gloy.*.nc)
do
withX=$(ncdump -h $somegloy | grep "X =")
withXp1=$(ncdump -h $somegloy | grep "Xp1 =")
recname=$(ncdump -h $somegloy \
| sed -n 's/\([a-z,A-Z]*\) = \(UNLIMITED\) .*/\1/p' \
| tr -d ' \t' )
if [ ${#withX} -gt 1 ]; then
echo Changing X to record variable in $somegloy
ncpdq $DEBUG -O -a X,$recname $somegloy $somegloy > $LOGFILE
fi
if [ ${#withXp1} -gt 1 ]; then
Xp1len=${withXp1#*= }
Xp1len=$((${Xp1len% ;}-1))
# Strip off the repeated value in Xp1
echo Changing Xp1 to record variable in $somegloy
echo ncpdq $DEBUG -O -a Xp1,$recname -F -d Xp1,1,$Xp1len $somegloy $somegloy > $LOGFILE
ncpdq $DEBUG -O -a Xp1,$recname -F -d Xp1,1,$Xp1len $somegloy $somegloy > $LOGFILE
fi
done
echo Combining $somevar.gloy files...
ncrcat $DEBUG $somevar.$somepre.gloy.*.nc $somevar.$somepre.glob.nc > $LOGFILE
# rm $somevar.$somepre.gloy.*.nc
# recname is still valid, so change back to it without testing for it again
if [ ${#withX} -gt 1 ]; then
echo Changing $recname to record variable in $somevar.$somepre.glob.nc
ncpdq $DEBUG -O -a $recname,X $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc > $LOGFILE
fi
if [ ${#withXp1} -gt 1 ]; then
echo Changing $recname to record variable in $somevar.$somepre.glob.nc
ncpdq $DEBUG -O -a $recname,Xp1 $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc > $LOGFILE
fi
if [ "$recname" = "$myrecname" ]; then
# only for variables that did not have a record dimension to begin with
echo "removing record dimension $recname from $somevar.$somepre.glob.nc"
ncwa $DEBUG -O -a $recname $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc
fi
ncks $DEBUG -A $somevar.$somepre.glob.nc $somepre.glob.nc > $LOGFILE
# rm $somevar.$somepre.glob.nc
done
if [ -f iter.$somepre.glob.nc ]; then
ncks $DEBUG -A iter.$somepre.glob.nc $somepre.glob.nc > $LOGFILE
fi
# rm iter.$somepre.glob.nc
# another hack by rpa to accomodate grid.nc files
# (there are several variables with just Z dimension that we want to keep)
# varsz=$( ncdump -h $somepre.t001.nc | sed -n 's/^\s*\(double\|float\).* \(\w*\)(Z\w*).*/\2/p' )
# The OR ("\|") and "\s", "\w" only work for GNU-sed, but not for
# BSD-sed or SunOS-sed, therefore we need to use some work-arounds:
varsz=$( ncdump -h $somepre.t001.nc | egrep "double|float" \
| grep -v , | sed -n 's/.* \(.*\)(Z.*).*/\1/p' )
fixed=
for varz in $varsz
do
# check to make sure the variable does not already exist in the glob file
if [[ -z $( ncdump -h $somepre.glob.nc | grep " $varz(" ) ]]
then
echo "Adding variable $varz to $somepre.glob.nc"
ncks $DEBUG -A -v $varz $somepre.t001.nc $somepre.glob.nc > $LOGFILE
fixed='yes'
fi
done
cp $somepre.glob.nc $DIRORIG
done
cd $DIRORIG
rm -rf $DIRNAME

Event Timeline