unpack-exp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, Feb 25, 10:36

unpack-exp
View Options

	#!/bin/bash

	[[ -z $YARN_SLS_RUN_PATH ]] && echo "YARN_SLS_RUN_PATH not set!" && exit 1
	#YARN_SLS_RUN_PATH=/HDD-2TB/YARN/hadoop-2.3.0/share/hadoop/tools/sls

	. $YARN_SLS_RUN_PATH/config/config_path.sh

	if [ $# -ne 2 ]
	then
	echo "Usage: ./this dir files"
	exit
	fi


	DIR=$1
	FILES=$2


	mkdir $DIR
	mv $FILES $DIR
	cd $DIR
	echo "Unpacking all tgz files"
	for i in *.tgz; do tar xzvf $i; done > /dev/null
	echo "Done unpacking all tgz files"
	mv xexperiments/* .
	rmdir xexperiments
	TRACE_FILE=$(basename $(find -name *.trace -type f \| head -n 1))
	find -name *.trace -type f \| head -n 1 \| xargs -I {} cp {} .
	TOPO_FILE=$(basename $(find -name topo-* -type f \| head -n 1))
	find -name topo-* -type f \| head -n 1 \| xargs -I {} cp {} .
	SIMTYPES=`find -type d \| grep TRACE \| sed 's/---/ /g' \| awk '{if ($3 == "informed") { print $3"-"$4 } else { print $3 } }' \| sort -u`
	for SIMTYPE in $SIMTYPES; do
	for i in -$SIMTYPE.tmem; do awk '{print $2}' $i \| $YARN_PATH/scripts/stats \| grep mean \| awk '{print $2}'; done > $DIR-$SIMTYPE.cu
	$YARN_PATH/scripts/cdf-gen.py $DIR-$SIMTYPE.cu 0 > $DIR-$SIMTYPE.cu.cdf
	done

	echo "Using tracefile: $TRACE_FILE"
	echo "Using topology file: $TOPO_FILE"


	NR_NODES=`ls \| grep topo \| head -n 1 \| grep -o 'topo-[0-9]\+' \| grep -o '[0-9]*'`
	echo "Nr nodes $NR_NODES"
	cat TRACEtrace \| tr "," " " \| awk -vN=$NR_NODES '$0 ~ /c.nr/ { print ($6(int($18/N)+1))/1000; }' > ideal.rt
	$YARN_PATH/scripts/cdf-gen.py ideal.rt 0 > ideal.rt.cdf
	echo "Done with estimating ideal running times"

	for logfile in $(find -name *-simulation.log); do
	mv $logfile .
	done

	#PEN_CHOICE=$(grep "Penalty choice" $(ls \| grep simulation.log \| tail -n 1) \| awk -F-- '{print $2}')
	#touch z$PEN_CHOICE
	#echo "Done creating file z$PEN_CHOICE which is named after the penalty function we are choosing"

	#code to get a cdf of average job running times (meaning the avg of runtime of job j across all runs for every j)
	cctr=1;
	for SIM in `ls \| grep simulation \| sed 's/-simulation.log//g' \| sed 's/^[0-9-]//g' \| sort \| uniq \| tr "\n" " " \| sed 's/-none//g' \| sed 's/$.$$regular$$.*$/\2\3\1/g'`
	do
	NRJ=`grep c.mem TRACE*.trace \| wc -l`
	SIM_FULL=$(echo $SIM \| sed "s/-/---/g")
	find \| grep "$SIM_FULL" \| grep jobruntime.csv \| xargs -I {} cat {} > $cctr-$SIM-all.csv.files
	> $cctr-$SIM-avg.jrt

	#this seems to be assuming that jobX is one run is the same as jobX in another run.
	#That may be the case if all submission time are different, but if they are equal YARN can switch the order of jobs, will not necessarily use trace order.
	#I take that back, it seems the csv files now contain but job_ (trace name) and app_ (YARN name)
	for job in `seq 1 $NRJ`;
	do
	grep "job_$job[,_]" $cctr-$SIM-all.csv.files \| awk -F, '{print ($6-$5)/1000}' \| $YARN_PATH/scripts/stats \| grep mean \| awk '{print $2}' >> $cctr-$SIM-avg.jrt
	done
	$YARN_PATH/scripts/cdf-gen.py $cctr-$SIM-avg.jrt 0 > $cctr-$SIM-avg.jrt.cdf
	#end of code to get a cdf of average job running times
	cctr=$(($cctr+1))
	done
	echo "Done averaging cdfs"

	for file in `ls \| grep simulation.log`
	do
	FIRST_DATE=`head -n 100 $file \| grep WARN \| head -n 1 \| awk '{print $1,$2}' \| sed 's/^14/2014/g' \| awk -F, '{print $1}'`
	LAST_DATE=`tail -n 100 $file \| grep WARN \| tail -n 1 \| awk '{print $1,$2}' \| sed 's/^14/2014/g' \| awk -F, '{print $1}'`
	echo $file "$FIRST_DATE" "$LAST_DATE" `$YARN_PATH/scripts/date-diff "$LAST_DATE" "$FIRST_DATE"`
	echo
	done \| column -t > sim.duration

	rm -fr *.tgz
	cp $YARN_SLS_RUN_PATH/run-in-parallel.sh .
	cp $YARN_SLS_RUN_PATH/exp-runner.sh .
	echo "Creating final archive files"
	mkdir -p ../arch
	tar czvf ../arch/$DIR.tgz .tmem .cdf TRACEtrace run-in-parallel.sh exp-runner.sh sim.duration avg.jrt ideal.rt*

unpack-expNo OneTemporaryActions

File Metadata

unpack-expView Options

Event Timeline

unpack-exp
No OneTemporary
Actions

unpack-exp
View Options