Page MenuHomec4science

run-in-parallel.sh
No OneTemporary

File Metadata

Created
Tue, Feb 25, 11:30

run-in-parallel.sh

#!/bin/bash
if [ $# -ne 14 ]
then
echo "Usage: ./run-in-parallel.sh TRACE_FILE TOPOLOGY_FILE PENALTY NODES(put in quotes) ESTERROR INITIAL_BUMP MIN_RATIO HB_INTERVAL CORES P1 P2 P3 P4 P5"
exit
fi
JOB_FILE=$1
TOPO_FILE=$2
PENALTY=$3
DCO_NODES=$4
EST_ERROR=$5
WAIT_TIME_ESTIMATION="none"
INITIAL_BUMP=$6
MIN_RATIO=$7
HB_INTERVAL=$8
CORES=$9
P1=${10}
P2=${11}
P3=${12}
P4=${13}
P5=${14}
EXP_START=$((`ls /HDD-2TB/results-YARN-utility | grep exp | grep -o '[0-9]\+' | sort | tail -n 1`+1))
NRRUNS=8
if [ `ls "/HDD-2TB/results-YARN-utility/" | grep exp$EXP_START | wc -l` -eq 1 ]
then
echo "Output directory $EXP_START exists. Exiting !!"
exit
else
echo "Using dir exp$EXP_START"
echo -e "Param: TOPOLOGY_FILE PENALTY ESTERROR INITIAL_BUMP MIN_RATIO HB_INTERVAL CORES P1 P2 P3 P4 P5 \n Param: $TOPO_FILE $PENALTY $EST_ERROR $INITIAL_BUMP $MIN_RATIO $HB_INTERVAL $CORES $P1 $P2 $P3 $P4 $P5" | column -t
fi
cd /HDD-2TB/results-YARN-utility
mkdir exp$EXP_START
cd -
EXPID=0; for i in `seq 1 4`; do ((EXPID=$EXPID*10+($RANDOM%9+1))); done
echo
CTR=100
#./kill-all
cleanup-nodes (){
NODE=$1
ssh dco-node$NODE-10g "cd /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls; rm -fr xexperiments/*;"
}
run-on-node () {
NODE=$1
SIMTYPE=$2
HBSEED=$3
PORT_START=$4
#this was for grepping part of the code to find the exact penalty used
#grep -A 15 -m 1 private.*getPenalizedRunningTime /HDD-2TB/YARN/hadoop-2.3.0-src/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java > $EXPID-$CTR-$SIMTYPE-penalty
#tar czvf exp-node$NODE-$CTR.tgz xexperiments/$EXPID-$CTR-* $EXPID-$CTR-$SIMTYPE-penalty $EXPID-$CTR-$SIMTYPE-simulation.log > /dev/null;
#rm $EXPID-$CTR-$SIMTYPE-penalty;
ssh dco-node$NODE-10g "
cd /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls;
rm -fr xexperiments/$EXPID-$CTR*;
./exp-runner.sh $JOB_FILE $TOPO_FILE $SIMTYPE $EXPID $CTR $HBSEED $PORT_START $PENALTY $EST_ERROR $WAIT_TIME_ESTIMATION $INITIAL_BUMP $MIN_RATIO $HB_INTERVAL $CORES $P1 $P2 $P3 $P4 $P5;
rm -fr exp-node$NODE-$CTR.tgz;
find | grep simulation.log | grep $EXPID-$CTR | xargs -I {} mv {} $EXPID-$CTR-$SIMTYPE-$WAIT_TIME_ESTIMATION-simulation.log
tar czvf exp-node$NODE-$CTR.tgz xexperiments/$EXPID-$CTR-* $EXPID-$CTR-$SIMTYPE-$WAIT_TIME_ESTIMATION-simulation.log > /dev/null;
scp exp-node$NODE-$CTR.tgz dco-node161-10g:/HDD-2TB/results-YARN-utility/exp$EXP_START;
rm -fr xexperiments/$EXPID-$CTR*;
rm $EXPID-$CTR-$SIMTYPE-$WAIT_TIME_ESTIMATION-simulation.log;
rm exp-node$NODE-$CTR.tgz" &
CTR=$(($CTR+1))
}
#there are already tgz files on the master node and they contain the results of the simulations in this run
#here we finalize the processing of the results and create one last tgz which contains the results to be plotted
unpack-exp () {
DIR=$1
FILES=$2
#mkdir $DIR
#cp parse-per-job-rt $DIR
#mv $FILES $DIR
cd $DIR
echo "Unpacking all tgz files at `date`"
for i in *.tgz; do tar xzvf $i; done > /dev/null
echo "Done unpacking all tgz files at `date`"
mv xexperiments/* .
rmdir xexperiments
find -type f | grep 'trace$' | head -n 1 | xargs -I {} cp {} .
#ls | grep penalty | tail -n +2 | xargs rm
#Sept8 commented out because select functionality seldom used, plus the first line do not grep for $SIMTYPE. How is that correct?
#Sept8 also commented out avgcu as it was farily meaningless. You can have 100% CU and take twice as long. Need another metric.
#xctr=1
#for SIMTYPE in "regular" "reserved" "greedy" "informed"
#do
# find | grep jobruntime.csv | xargs -I {} grep 'select' {} | awk -F, '{print $6}' > $xctr-$SIMTYPE.select
# ~/cdf-gen.py $xctr-$SIMTYPE.select 0 > $xctr-$SIMTYPE.select.cdf
# for i in *$SIMTYPE.tmem; do awk '{print $2}' $i | ~/stats | grep mean | awk '{print $2}'; done > $xctr-$SIMTYPE.avgcu
# ~/cdf-gen.py $xctr-$SIMTYPE.avgcu 0 > $xctr-$SIMTYPE.avgcu.cdf
# xctr=$(($xctr+1))
#done
rm -fr *.tgz
cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/run-in-parallel.sh .
cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/exp-runner.sh .
cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/all-run* .
#TRACE_FILE=`cat run-in-parallel.sh | grep TRACE | head -n 1 | awk -F\" '{print $2}'`
TRACE_FILE=`ls | grep TRACE | head -n 1 | awk -F--- '{print $1}' | grep -o 'TRACE.*'`
GEN_FILE=`echo $TRACE_FILE | sed 's/.trace$/.trace.generator/g'`
cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/$TRACE_FILE .
cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/$GEN_FILE .
cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/$TOPO_FILE .
cp /HDD-2TB/YARN/hadoop-2.3.0-src/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java .
cp /HDD-2TB/YARN/hadoop-2.3.0-src/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java .
echo "Using tracefile: $TRACE_FILE"
echo "Using trace generator file: $GEN_FILE"
echo "Using topology file: $GEN_FILE"
NR_NODES=`ls | grep topo | head -n 1 | grep -o 'topo-[0-9]\+' | grep -o '[0-9]*'`
echo "Nr nodes $NR_NODES"
cat TRACE*trace | tr "," " " | awk -vN=$NR_NODES '$0 ~ /c.nr/ { print ($6*(int($18/N)+1))/1000; }' > ideal.rt
~/cdf-gen.py ideal.rt 0 > ideal.rt.cdf
echo "Done with estimating ideal running times"
PEN_CHOICE=$(grep "Penalty choice" $(ls | grep simulation.log | tail -n 1) | awk -F-- '{print $2}')
touch z$PEN_CHOICE
echo "Done creating file z$PEN_CHOICE which is named after the penalty function we are choosing"
#code to get a cdf of average job running times (meaning the avg of runtime of job j across all runs for every j)
cctr=1;
for SIM in `ls | grep simulation | sed 's/-simulation.log//g' | sed 's/^[0-9-]*//g' | sort | uniq | tr "\n" " " | sed 's/-none//g' | sed 's/\(.*\)\(regular\)\(.*\)/\2\3\1/g'`
do
NRJ=`grep c.mem TRACE*.trace | wc -l`
find | grep "$SIM" | grep jobruntime.csv | xargs -I {} cat {} > $cctr-$SIM-all.csv.files
> $cctr-$SIM-avg.jrt
#this seems to be assuming that jobX is one run is the same as jobX in another run.
#That may be the case if all submission time are different, but if they are equal YARN can switch the order of jobs, will not necessarily use trace order.
#I take that back, it seems the csv files now contain but job_ (trace name) and app_ (YARN name)
for job in `seq 1 $NRJ`;
do
grep "job_$job[,_]" $cctr-$SIM-all.csv.files | awk -F, '{print ($6-$5)/1000}' | ~/stats | grep mean | awk '{print $2}' >> $cctr-$SIM-avg.jrt
done
~/cdf-gen.py $cctr-$SIM-avg.jrt 0 > $cctr-$SIM-avg.jrt.cdf
#end of code to get a cdf of average job running times
cctr=$(($cctr+1))
done
echo "Done averaging cdfs"
for file in `ls | grep simulation.log`
do
FIRST_DATE=`head -n 100 $file | grep WARN | head -n 1 | awk '{print $1,$2}' | sed 's/^14/2014/g' | awk -F, '{print $1}'`
LAST_DATE=`tail -n 100 $file | grep WARN | tail -n 1 | awk '{print $1,$2}' | sed 's/^14/2014/g' | awk -F, '{print $1}'`
echo $file "$FIRST_DATE" "$LAST_DATE" `~/date-diff "$LAST_DATE" "$FIRST_DATE"`
echo
done | column -t > sim.duration
echo "Done computing time taken by simulations"
}
#-------------------------------------- now run the damn thing ----------------------------------------#
for node in $DCO_NODES
do
cleanup-nodes $node
for run in `seq 1 $NRRUNS`;
do
if [[ $run -eq 1 ]]; then SIM="regular" ; fi
if [[ $run -eq 2 ]]; then SIM="reserved" ; fi
if [[ $run -eq 3 ]]; then SIM="greedy" ; fi
if [[ $run -eq 4 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="hAgg"; fi
if [[ $run -eq 5 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="hInd"; fi
if [[ $run -eq 6 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="crt"; fi
#if [[ $run -eq 7 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="fut"; fi
if [[ $run -eq 7 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="rnd"; fi
if [[ $run -eq 8 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="head"; fi
run-on-node $node $SIM 0 $((10000+$run*10)) ;
done
WAIT_TIME_ESTIMATION="none"
done
wait
cd /HDD-2TB/results-YARN-utility
echo;echo;echo
echo "Done waiting for all experiments to finish, now running unpack script for $SIM at `date`"
unpack-exp exp$EXP_START "*.tgz"
echo "Done calling unpack-exp method"
../correlate-impact-with-memory.sh . > correlation-impact-with-mem
echo "Done correlating impact with memory"
echo "Creating final archive at `date`"
tar czvf ../arch/exp$EXP_START.tgz *.tmem *.cdf TRACE* run-in-parallel.sh exp-runner.sh sim.duration all-run* z* correlation-impact-with-mem topo* > /dev/null
#removed *penalty* *.select.cdf parameters
echo "Output directory is $EXP_START"
echo "All done at `date`"
echo
echo "========================================================================="
echo
echo

Event Timeline