run-in-parallel.sh
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, Feb 25, 11:30

run-in-parallel.sh
View Options

	#!/bin/bash

	if [ $# -ne 14 ]
	then
	echo "Usage: ./run-in-parallel.sh TRACE_FILE TOPOLOGY_FILE PENALTY NODES(put in quotes) ESTERROR INITIAL_BUMP MIN_RATIO HB_INTERVAL CORES P1 P2 P3 P4 P5"
	exit
	fi

	JOB_FILE=$1
	TOPO_FILE=$2
	PENALTY=$3
	DCO_NODES=$4
	EST_ERROR=$5
	WAIT_TIME_ESTIMATION="none"
	INITIAL_BUMP=$6
	MIN_RATIO=$7
	HB_INTERVAL=$8
	CORES=$9
	P1=${10}
	P2=${11}
	P3=${12}
	P4=${13}
	P5=${14}


	EXP_START=$((`ls /HDD-2TB/results-YARN-utility \| grep exp \| grep -o '[0-9]\+' \| sort \| tail -n 1`+1))
	NRRUNS=8



	if [ `ls "/HDD-2TB/results-YARN-utility/" \| grep exp$EXP_START \| wc -l` -eq 1 ]
	then
	echo "Output directory $EXP_START exists. Exiting !!"
	exit
	else
	echo "Using dir exp$EXP_START"
	echo -e "Param: TOPOLOGY_FILE PENALTY ESTERROR INITIAL_BUMP MIN_RATIO HB_INTERVAL CORES P1 P2 P3 P4 P5 \n Param: $TOPO_FILE $PENALTY $EST_ERROR $INITIAL_BUMP $MIN_RATIO $HB_INTERVAL $CORES $P1 $P2 $P3 $P4 $P5" \| column -t
	fi

	cd /HDD-2TB/results-YARN-utility
	mkdir exp$EXP_START
	cd -

	EXPID=0; for i in `seq 1 4`; do ((EXPID=$EXPID*10+($RANDOM%9+1))); done
	echo
	CTR=100
	#./kill-all






	cleanup-nodes (){
	NODE=$1
	ssh dco-node$NODE-10g "cd /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls; rm -fr xexperiments/*;"
	}






	run-on-node () {

	NODE=$1
	SIMTYPE=$2
	HBSEED=$3
	PORT_START=$4


	#this was for grepping part of the code to find the exact penalty used
	#grep -A 15 -m 1 private.*getPenalizedRunningTime /HDD-2TB/YARN/hadoop-2.3.0-src/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java > $EXPID-$CTR-$SIMTYPE-penalty
	#tar czvf exp-node$NODE-$CTR.tgz xexperiments/$EXPID-$CTR-* $EXPID-$CTR-$SIMTYPE-penalty $EXPID-$CTR-$SIMTYPE-simulation.log > /dev/null;
	#rm $EXPID-$CTR-$SIMTYPE-penalty;

	ssh dco-node$NODE-10g "
	cd /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls;
	rm -fr xexperiments/$EXPID-$CTR*;
	./exp-runner.sh $JOB_FILE $TOPO_FILE $SIMTYPE $EXPID $CTR $HBSEED $PORT_START $PENALTY $EST_ERROR $WAIT_TIME_ESTIMATION $INITIAL_BUMP $MIN_RATIO $HB_INTERVAL $CORES $P1 $P2 $P3 $P4 $P5;
	rm -fr exp-node$NODE-$CTR.tgz;
	find \| grep simulation.log \| grep $EXPID-$CTR \| xargs -I {} mv {} $EXPID-$CTR-$SIMTYPE-$WAIT_TIME_ESTIMATION-simulation.log
	tar czvf exp-node$NODE-$CTR.tgz xexperiments/$EXPID-$CTR-* $EXPID-$CTR-$SIMTYPE-$WAIT_TIME_ESTIMATION-simulation.log > /dev/null;
	scp exp-node$NODE-$CTR.tgz dco-node161-10g:/HDD-2TB/results-YARN-utility/exp$EXP_START;
	rm -fr xexperiments/$EXPID-$CTR*;
	rm $EXPID-$CTR-$SIMTYPE-$WAIT_TIME_ESTIMATION-simulation.log;
	rm exp-node$NODE-$CTR.tgz" &
	CTR=$(($CTR+1))

	}








	#there are already tgz files on the master node and they contain the results of the simulations in this run
	#here we finalize the processing of the results and create one last tgz which contains the results to be plotted
	unpack-exp () {
	DIR=$1
	FILES=$2

	#mkdir $DIR
	#cp parse-per-job-rt $DIR
	#mv $FILES $DIR
	cd $DIR
	echo "Unpacking all tgz files at `date`"
	for i in *.tgz; do tar xzvf $i; done > /dev/null
	echo "Done unpacking all tgz files at `date`"
	mv xexperiments/* .
	rmdir xexperiments
	find -type f \| grep 'trace$' \| head -n 1 \| xargs -I {} cp {} .
	#ls \| grep penalty \| tail -n +2 \| xargs rm

	#Sept8 commented out because select functionality seldom used, plus the first line do not grep for $SIMTYPE. How is that correct?
	#Sept8 also commented out avgcu as it was farily meaningless. You can have 100% CU and take twice as long. Need another metric.
	#xctr=1
	#for SIMTYPE in "regular" "reserved" "greedy" "informed"
	#do
	# find \| grep jobruntime.csv \| xargs -I {} grep 'select' {} \| awk -F, '{print $6}' > $xctr-$SIMTYPE.select
	# ~/cdf-gen.py $xctr-$SIMTYPE.select 0 > $xctr-$SIMTYPE.select.cdf
	# for i in *$SIMTYPE.tmem; do awk '{print $2}' $i \| ~/stats \| grep mean \| awk '{print $2}'; done > $xctr-$SIMTYPE.avgcu
	# ~/cdf-gen.py $xctr-$SIMTYPE.avgcu 0 > $xctr-$SIMTYPE.avgcu.cdf
	# xctr=$(($xctr+1))
	#done

	rm -fr *.tgz
	cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/run-in-parallel.sh .
	cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/exp-runner.sh .
	cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/all-run* .
	#TRACE_FILE=`cat run-in-parallel.sh \| grep TRACE \| head -n 1 \| awk -F\" '{print $2}'`
	TRACE_FILE=`ls \| grep TRACE \| head -n 1 \| awk -F--- '{print $1}' \| grep -o 'TRACE.*'`
	GEN_FILE=`echo $TRACE_FILE \| sed 's/.trace$/.trace.generator/g'`
	cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/$TRACE_FILE .
	cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/$GEN_FILE .
	cp /HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls/$TOPO_FILE .
	cp /HDD-2TB/YARN/hadoop-2.3.0-src/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java .
	cp /HDD-2TB/YARN/hadoop-2.3.0-src/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java .
	echo "Using tracefile: $TRACE_FILE"
	echo "Using trace generator file: $GEN_FILE"
	echo "Using topology file: $GEN_FILE"


	NR_NODES=`ls \| grep topo \| head -n 1 \| grep -o 'topo-[0-9]\+' \| grep -o '[0-9]*'`
	echo "Nr nodes $NR_NODES"
	cat TRACEtrace \| tr "," " " \| awk -vN=$NR_NODES '$0 ~ /c.nr/ { print ($6(int($18/N)+1))/1000; }' > ideal.rt
	~/cdf-gen.py ideal.rt 0 > ideal.rt.cdf
	echo "Done with estimating ideal running times"

	PEN_CHOICE=$(grep "Penalty choice" $(ls \| grep simulation.log \| tail -n 1) \| awk -F-- '{print $2}')
	touch z$PEN_CHOICE
	echo "Done creating file z$PEN_CHOICE which is named after the penalty function we are choosing"

	#code to get a cdf of average job running times (meaning the avg of runtime of job j across all runs for every j)
	cctr=1;
	for SIM in `ls \| grep simulation \| sed 's/-simulation.log//g' \| sed 's/^[0-9-]//g' \| sort \| uniq \| tr "\n" " " \| sed 's/-none//g' \| sed 's/$.$$regular$$.*$/\2\3\1/g'`
	do
	NRJ=`grep c.mem TRACE*.trace \| wc -l`
	find \| grep "$SIM" \| grep jobruntime.csv \| xargs -I {} cat {} > $cctr-$SIM-all.csv.files
	> $cctr-$SIM-avg.jrt

	#this seems to be assuming that jobX is one run is the same as jobX in another run.
	#That may be the case if all submission time are different, but if they are equal YARN can switch the order of jobs, will not necessarily use trace order.
	#I take that back, it seems the csv files now contain but job_ (trace name) and app_ (YARN name)
	for job in `seq 1 $NRJ`;
	do
	grep "job_$job[,_]" $cctr-$SIM-all.csv.files \| awk -F, '{print ($6-$5)/1000}' \| ~/stats \| grep mean \| awk '{print $2}' >> $cctr-$SIM-avg.jrt
	done
	~/cdf-gen.py $cctr-$SIM-avg.jrt 0 > $cctr-$SIM-avg.jrt.cdf
	#end of code to get a cdf of average job running times
	cctr=$(($cctr+1))
	done
	echo "Done averaging cdfs"

	for file in `ls \| grep simulation.log`
	do
	FIRST_DATE=`head -n 100 $file \| grep WARN \| head -n 1 \| awk '{print $1,$2}' \| sed 's/^14/2014/g' \| awk -F, '{print $1}'`
	LAST_DATE=`tail -n 100 $file \| grep WARN \| tail -n 1 \| awk '{print $1,$2}' \| sed 's/^14/2014/g' \| awk -F, '{print $1}'`
	echo $file "$FIRST_DATE" "$LAST_DATE" `~/date-diff "$LAST_DATE" "$FIRST_DATE"`
	echo
	done \| column -t > sim.duration
	echo "Done computing time taken by simulations"
	}







	#-------------------------------------- now run the damn thing ----------------------------------------#


	for node in $DCO_NODES
	do
	cleanup-nodes $node
	for run in `seq 1 $NRRUNS`;
	do

	if [[ $run -eq 1 ]]; then SIM="regular" ; fi
	if [[ $run -eq 2 ]]; then SIM="reserved" ; fi
	if [[ $run -eq 3 ]]; then SIM="greedy" ; fi
	if [[ $run -eq 4 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="hAgg"; fi
	if [[ $run -eq 5 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="hInd"; fi
	if [[ $run -eq 6 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="crt"; fi
	#if [[ $run -eq 7 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="fut"; fi
	if [[ $run -eq 7 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="rnd"; fi
	if [[ $run -eq 8 ]]; then SIM="informed" ; WAIT_TIME_ESTIMATION="head"; fi

	run-on-node $node $SIM 0 $((10000+$run*10)) ;
	done
	WAIT_TIME_ESTIMATION="none"
	done


	wait
	cd /HDD-2TB/results-YARN-utility

	echo;echo;echo
	echo "Done waiting for all experiments to finish, now running unpack script for $SIM at `date`"
	unpack-exp exp$EXP_START "*.tgz"
	echo "Done calling unpack-exp method"

	../correlate-impact-with-memory.sh . > correlation-impact-with-mem
	echo "Done correlating impact with memory"

	echo "Creating final archive at `date`"
	tar czvf ../arch/exp$EXP_START.tgz .tmem .cdf TRACE* run-in-parallel.sh exp-runner.sh sim.duration all-run* z* correlation-impact-with-mem topo* > /dev/null
	#removed penalty *.select.cdf parameters

	echo "Output directory is $EXP_START"
	echo "All done at `date`"
	echo
	echo "========================================================================="
	echo
	echo

run-in-parallel.shNo OneTemporaryActions

File Metadata

run-in-parallel.shView Options

Event Timeline

run-in-parallel.sh
No OneTemporary
Actions

run-in-parallel.sh
View Options