diff --git a/all/spark/execute-spark.sh b/all/spark/execute-spark.sh index 03b6fb6..77d8048 100755 --- a/all/spark/execute-spark.sh +++ b/all/spark/execute-spark.sh @@ -1,46 +1,46 @@ #!/bin/bash # author: gilles fourestey (EPFL) # based on http://serverfault.com/questions/776687/how-can-i-run-spark-on-a-cluster-using-slurm # debug flag set -x # This section will be run when started by sbatch if [ "x$1" == 'x' ]; then # script=$0 # log files export sparkLogs=./sparklogs/ export sparkTmp=./sparktmp/ mkdir -p "$sparkLogs" "$sparkTmp" # export SPARK_WORKER_DIR=$sparkLogs export SPARK_LOCAL_DIRS=$sparkLogs export SPARK_MASTER_PORT=7077 export SPARK_MASTER_WEBUI_PORT=8080 export SPARK_WORKER_CORES=$SLURM_CPUS_PER_TASK #export SPARK_MEM=48G #export SPARK_DRIVER_MEMORY=60G #export SPARK_DAEMON_MEMORY=$(( $SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK / 2 ))m #export SPARK_DAEMON_MEMORY=8G #export SPARK_WORKER_MEMORY=48G #export SPARK_MEM=$SPARK_DAEMON_MEMORY # srun "$script" 'srunning' # # If run by srun, then decide by $SLURM_PROCID whether we are master or worker elif [ "$1" == 'srunning' ]; then if [ $SLURM_PROCID -eq 0 ]; then - export SPARK_MASTER_IP=$(hostname) + export SPARK_MASTER_HOST=$(hostname) MASTER_NODE=$(scontrol show hostname $SLURM_NODELIST | head -n 1) # - echo "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT" > "./${SLURM_JOBID}_spark_master" + echo "spark://$SPARK_MASTER_HOST:$SPARK_MASTER_PORT" > "./${SLURM_JOBID}_spark_master" # "$SPARK_ROOT/bin/spark-class" org.apache.spark.deploy.master.Master \ - --ip $SPARK_MASTER_IP \ + --ip $SPARK_MASTER_HOST \ --port $SPARK_MASTER_PORT \ --webui-port $SPARK_MASTER_WEBUI_PORT else MASTER_NODE=spark://$(scontrol show hostname $SLURM_NODELIST | head -n 1):7077 "$SPARK_ROOT/bin/spark-class" org.apache.spark.deploy.worker.Worker $MASTER_NODE fi fi