Page MenuHomec4science

unpack-exp
No OneTemporary

File Metadata

Created
Tue, Feb 25, 10:36

unpack-exp

#!/bin/bash
[[ -z $YARN_SLS_RUN_PATH ]] && echo "YARN_SLS_RUN_PATH not set!" && exit 1
#YARN_SLS_RUN_PATH=/HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls
. $YARN_SLS_RUN_PATH/config/config_path.sh
if [ $# -ne 2 ]
then
echo "Usage: ./this dir files"
exit
fi
DIR=$1
FILES=$2
mkdir $DIR
mv $FILES $DIR
cd $DIR
echo "Unpacking all tgz files"
for i in *.tgz; do tar xzvf $i; done > /dev/null
echo "Done unpacking all tgz files"
mv xexperiments/* .
rmdir xexperiments
TRACE_FILE=$(basename $(find -name *.trace -type f | head -n 1))
find -name *.trace -type f | head -n 1 | xargs -I {} cp {} .
TOPO_FILE=$(basename $(find -name topo-* -type f | head -n 1))
find -name topo-* -type f | head -n 1 | xargs -I {} cp {} .
SIMTYPES=`find -type d | grep TRACE | sed 's/---/ /g' | awk '{if ($3 == "informed") { print $3"-"$4 } else { print $3 } }' | sort -u`
for SIMTYPE in $SIMTYPES; do
for i in *-$SIMTYPE*.tmem; do awk '{print $2}' $i | $YARN_PATH/scripts/stats | grep mean | awk '{print $2}'; done > $DIR-$SIMTYPE.cu
$YARN_PATH/scripts/cdf-gen.py $DIR-$SIMTYPE.cu 0 > $DIR-$SIMTYPE.cu.cdf
done
echo "Using tracefile: $TRACE_FILE"
echo "Using topology file: $TOPO_FILE"
NR_NODES=`ls | grep topo | head -n 1 | grep -o 'topo-[0-9]\+' | grep -o '[0-9]*'`
echo "Nr nodes $NR_NODES"
cat TRACE*trace | tr "," " " | awk -vN=$NR_NODES '$0 ~ /c.nr/ { print ($6*(int($18/N)+1))/1000; }' > ideal.rt
$YARN_PATH/scripts/cdf-gen.py ideal.rt 0 > ideal.rt.cdf
echo "Done with estimating ideal running times"
for logfile in $(find -name *-simulation.log); do
mv $logfile .
done
#PEN_CHOICE=$(grep "Penalty choice" $(ls | grep simulation.log | tail -n 1) | awk -F-- '{print $2}')
#touch z$PEN_CHOICE
#echo "Done creating file z$PEN_CHOICE which is named after the penalty function we are choosing"
#code to get a cdf of average job running times (meaning the avg of runtime of job j across all runs for every j)
cctr=1;
for SIM in `ls | grep simulation | sed 's/-simulation.log//g' | sed 's/^[0-9-]*//g' | sort | uniq | tr "\n" " " | sed 's/-none//g' | sed 's/\(.*\)\(regular\)\(.*\)/\2\3\1/g'`
do
NRJ=`grep c.mem TRACE*.trace | wc -l`
SIM_FULL=$(echo $SIM | sed "s/-/---/g")
find | grep "$SIM_FULL" | grep jobruntime.csv | xargs -I {} cat {} > $cctr-$SIM-all.csv.files
> $cctr-$SIM-avg.jrt
#this seems to be assuming that jobX is one run is the same as jobX in another run.
#That may be the case if all submission time are different, but if they are equal YARN can switch the order of jobs, will not necessarily use trace order.
#I take that back, it seems the csv files now contain but job_ (trace name) and app_ (YARN name)
for job in `seq 1 $NRJ`;
do
grep "job_$job[,_]" $cctr-$SIM-all.csv.files | awk -F, '{print ($6-$5)/1000}' | $YARN_PATH/scripts/stats | grep mean | awk '{print $2}' >> $cctr-$SIM-avg.jrt
done
$YARN_PATH/scripts/cdf-gen.py $cctr-$SIM-avg.jrt 0 > $cctr-$SIM-avg.jrt.cdf
#end of code to get a cdf of average job running times
cctr=$(($cctr+1))
done
echo "Done averaging cdfs"
for file in `ls | grep simulation.log`
do
FIRST_DATE=`head -n 100 $file | grep WARN | head -n 1 | awk '{print $1,$2}' | sed 's/^14/2014/g' | awk -F, '{print $1}'`
LAST_DATE=`tail -n 100 $file | grep WARN | tail -n 1 | awk '{print $1,$2}' | sed 's/^14/2014/g' | awk -F, '{print $1}'`
echo $file "$FIRST_DATE" "$LAST_DATE" `$YARN_PATH/scripts/date-diff "$LAST_DATE" "$FIRST_DATE"`
echo
done | column -t > sim.duration
rm -fr *.tgz
cp $YARN_SLS_RUN_PATH/run-in-parallel.sh .
cp $YARN_SLS_RUN_PATH/exp-runner.sh .
echo "Creating final archive files"
mkdir -p ../arch
tar czvf ../arch/$DIR.tgz *.tmem *.cdf TRACE*trace* run-in-parallel.sh exp-runner.sh sim.duration *avg.jrt* ideal.rt*

Event Timeline