diff --git a/README.md b/README.md
index b03be21..98e78ed 100644
--- a/README.md
+++ b/README.md
@@ -1,258 +1,259 @@
 c4science.ch
 =========
 
 * Ansible playbook for git infrastructure on openstack
 
 INSTALL
 -------
 
 * Dependencies. You need ansible >= 2.0
 
 ```
 cd ~
 git clone https://github.com/ansible/ansible.git
 cd ansible
 git submodule update --init --recursive
 sudo python setup.py install
 sudo pip install shade python-novaclient
 ```
 
 * Repo
 
 ```
   git clone repourl c4science.ch
   cd c4science.ch
   git submodule update --init --recursive
 ```
 
 USAGE
 -----
 
 * How to use,
 
 ```
   make status #list instances
   make up #create instances
   make clean #destroy instances
 ```
 
 * You must configure SSH so the connections go trough the jump server
 
 ~/.ssh/config
 ```
 Host EXTERNAL_IP
     HostName c4science.ch-jump01
     User centos
     StrictHostKeyChecking no
     UserKnownHostsFile=/dev/null
 
 Host 10.0.*
     User centos
     ProxyCommand ssh c4science.ch-jump01 -W %h:%p
     StrictHostKeyChecking no
     UserKnownHostsFile=/dev/null
 ```
 ```
 echo 'EXTERNAL_IP   c4science.ch-jump01' >> /etc/hosts
 ```
 
 * You must create floating IPs
   * One on region_main and put it in external_ip in vars/main.yml
   * One on region_back and put it in backup_ip in vars/main.yml
 
 * You must create a Switch Engines bucket
     * see https://help.switch.ch/engines/documentation/s3-like-object-storage/
 
 ```
   ./s3cmd mb s3://phabricator
 ```
 
 * You have to copy ssh hostkeys for app servers, so they are all the same
 
 ```
 rsync -av c4science-app01:/etc/ssh/ssh_host_*_key /tmp/
 rsync -av /tmp/ssh_host_*_key c4science-app0X:/etc/ssh/
 ssh c4science-app0X 'service sshd_phabricator restart'
 ```
 
 * You have to copy shibboleth certificate accross instances from app00
 ```
 rsync -av c4science-app00:/etc/shibboleth/sp-*.pem /tmp/.
 rsync -av /tmp/sp-*.pem c4science-app01:/etc/shibboleth/.
 ssh c4science-app01 'service shibd restart'
 ssh c4science-app00 'openssl x509 -noout -fingerprint -sha1 -in /etc/shibboleth/sp-cert.pem'
 ssh c4science-app01 'openssl x509 -noout -fingerprint -sha1 -in /etc/shibboleth/sp-cert.pem'
 rm /tmp/sp-*.pem
 ```
 
 * Create a ssh-key without password in app00 and copy the public key to the backup server (root user)
 
 * Install the logged dashboard
     * Create the dashboard in the phabricator web interface
     * Get the PHID of the new dashboard
 ```
 mysql> use phabricator_dashboard;
 mysql> select name,phid from dashboard;
 ```
     * Install the dashboard
 ```
 mysql> insert into dashboard_install (installerPHID, objectPHID, applicationClass, dashboardPHID, dateCreated, dateModified) values ('PHID-USER-wwnpcpwveuiz7uts3oin', 'dashboard:default_loggedin', 'PhabricatorHomeApplication', 'PHID-DSHB-j64cvog4impmcgb7e3sa', 0, 0);
 ```
 
 Build the Jenkins slave docker images
 -------------------------------------
 
 * Build the image on your local machine
 ```
 mkdir /tmp/docker
 cp roles/ci/templates/jenkins-slave-centos.docker /tmp/docker/Dockerfile
 cd /tmp/docker
 docker build --rm=true -t jenkins-centos:7 .
 docker save jenkins-centos7 > ../jenkins-centos7.tar
 ```
     * Do it for every dockerfile in role/ci/templates/
 
 * Copy the tar to the CoreOS machine and import the image
 ```
 docker load < jenkins-centos7.tar
 docker images
 docker run -i -t jenkins-centos7 /bin/bash
 ```
 
 ## Nagios monitoring of CoreOS
 
 * Build the image
 
 ```
 mkdir /tmp/docker
 cp roles/ci/templates/jenkins-nagios.docker /tmp/docker/Dockerfile
 cp roles/ci/templates/*nrpe* /tmp/docker/
+cp roles/ci/templates/gmond.conf /tmp/docker/
 cp roles/common/templates/check_mem.sh /tmp/docker
 cd /tmp/docker
 docker build --rm=true -t jenkins-nagios .
 docker save jenkins-nagios > ../jenkins-nagios.tar
 ```
 
 * Install and run the Nagios image after copying it to the server
 
 ```
 docker load < jenkins-nagios.tar
-docker run --restart=always --pid=host --add-host c4science-monit:10.0.74.150 \
-    --privileged=true -p 5666:5666 -d -i -t jenkins-nagios
+docker run --restart=always --pid=host --net=host \
+    --privileged=true -d -i -t jenkins-nagios
 ```
 
 SCALING UP
 ----------
 
 ### Database
 
 * Add a database node in tasks/create-instances.yml by an numbered item both in the os_server and add_host actions
 # Patch example
 ```
 diff --git a/tasks/create-instances.yml b/tasks/create-instances.yml
 index 3037cc0..a6ac097 100644
 --- a/tasks/create-instances.yml
 +++ b/tasks/create-instances.yml
 @@ -79,6 +79,7 @@
        - 0
        - 1
        - 2
 +      - 3
  
  - add_host: 
      name: "{{ openstackdb.results[item].openstack.private_v4 }}" 
 @@ -89,6 +90,7 @@
        - 0
        - 1
        - 2
 +      - 3
  
  - name: Create Monitoring instance
    os_server:
 ```
 * Run init playbook: `make init`
 * Check that the node joined galera replication: `mysql -e "SHOW STATUS LIKE 'wsrep_cluster_size';"`
 * Even number of dbs instances is not recommended, you can use the arbitrator to have one more node using `make arbitrator`on the monit node
 
 ### Web/storage
 
 * Add an app node in tasks/create-instances.yml by an numbered item both in the os_server and add_host actions
 * Run init playbook: `make init`
 * Check that gluster is working: `gluster volume info`
 
 ### Scaling down
 
 * Stop the instance with: `nova stop <instanceid>`
 * Remove the instance from the configuration file tasks/create-instance.yml
 * Run init playbook: `make init`
 * Eventually delete the instance: `nova delete <instanceid>`
 * The volume is still available, and can be reused
 
 TODO
 ----
 
 * Shibboleth auth
 * Haproxy redundancy using keepalived https://raymii.org/s/articles/Building_HA_Clusters_With_Ansible_and_Openstack.html
 
 TEST
 ----
 
 * Replication information
 
 ```
     mysql -e "SHOW STATUS LIKE 'wsrep_cluster%';"
 ```
 
 * Some benchmarking examples,
 
 ```
 ## GIT Read
 cd /tmp
 parallel -j 10 git clone ssh://git@c4science.ch:2222/diffusion/TEST/test.git \
     -- $(for i in $(seq 20); do echo test$i; done) 1> /dev/null
 ```
 ## GIT Write sequential
 ```
 cd /tmp
 git clone ssh://git@c4science.ch:2222/diffusion/TEST/test.git
 for i in {1..10}; do
     time sh -c "echo 'test' >> README.md;
                 git commit -am 'test';
                 git push" &>/dev/null
 done
 ```
 ```
 ## Conduit API (create repo from remote)
 REPO=$(echo {A..Z})
 # Create some repositories
 for i in $REPO; do
     echo "{\"name\":\"test\", \"callsign\": \"TEST$i\", \"vcs\": \"git\", \"uri\": \"https://git.epfl.ch/repo/repo-test.git\"}" \
         | arc call-conduit repository.create
 done
 
 # Clone them (doesnt work)
 #cd /tmp
 #for i in $REPO; do
 #    git clone ssh://git@c4science.ch:2222/diffusion/TEST$i/test.git test$i
 #done
 # Test commit and push
 #parallel -i -j 10 sh -c 'cd test{};
 #    echo "TEST" > README.md;
 #    git commit -am "test";
 #    git push' -- $(echo $REPO)
 ```
 ```
 ## GIT test lock
 parallel -i -j 5 sh -c 'cd test{};
     git pull --no-edit;
     git commit -am "merge conflicts";
     echo "* TEST" >> README.md;
     git commit -am "test";
     git push || git pull --no-edit;
     git push' -- $(seq 50)
 ```
 ```
 ## HTTP
 ab -C phsid:COOK -C phusr:admin -n 1000 \
     -c 10 https://c4science.ch/diffusion/TEST/repository/master/
 ```
diff --git a/roles/ci/templates/gmond.conf b/roles/ci/templates/gmond.conf
new file mode 100644
index 0000000..78f2ba9
--- /dev/null
+++ b/roles/ci/templates/gmond.conf
@@ -0,0 +1,379 @@
+/* This configuration is as close to 2.5.x default behavior as possible
+   The values closely match ./gmond/metric.h definitions in 2.5.x */
+globals {
+  daemonize = yes
+  setuid = yes
+  user = ganglia
+  debug_level = 0
+  max_udp_msg_len = 1472
+  mute = no
+  deaf = no
+  allow_extra_data = yes
+  host_dmax = 86400 /*secs. Expires (removes from web interface) hosts in 1 day */
+  host_tmax = 20 /*secs */
+  cleanup_threshold = 300 /*secs */
+  gexec = no
+  # By default gmond will use reverse DNS resolution when displaying your hostname
+  # Uncommeting following value will override that value.
+  # override_hostname = "mywebserver.domain.com"
+  # If you are not using multicast this value should be set to something other than 0.
+  # Otherwise if you restart aggregator gmond you will get empty graphs. 60 seconds is reasonable
+  send_metadata_interval = 0 /*secs */
+
+}
+
+/*
+ * The cluster attributes specified will be used as part of the <CLUSTER>
+ * tag that will wrap all hosts collected by this instance.
+ */
+cluster {
+  name = "c4science"
+  owner = "EPFL"
+  latlong = "N46.519964 W6.566713"
+  url = "http://epfl.ch"
+}
+
+/* The host section describes attributes of the host, like the location */
+host {
+  location = "Switch Engines"
+}
+
+/* Feel free to specify as many udp_send_channels as you like.  Gmond
+   used to only support having a single channel */
+udp_send_channel {
+  #bind_hostname = yes # Highly recommended, soon to be default.
+                       # This option tells gmond to use a source address
+                       # that resolves to the machine's hostname.  Without
+                       # this, the metrics may appear to come from any
+                       # interface and the DNS names associated with
+                       # those IPs will be used to create the RRDs.
+  mcast_join = 239.2.11.71
+  port = 8649
+  ttl = 1
+}
+
+/* You can specify as many udp_recv_channels as you like as well. */
+udp_recv_channel {
+  mcast_join = 239.2.11.71
+  port = 8649
+  bind = 239.2.11.71
+  retry_bind = true
+  # Size of the UDP buffer. If you are handling lots of metrics you really
+  # should bump it up to e.g. 10MB or even higher.
+  # buffer = 10485760
+}
+
+/* You can specify as many tcp_accept_channels as you like to share
+   an xml description of the state of the cluster */
+tcp_accept_channel {
+  port = 8649
+  # If you want to gzip XML output
+  gzip_output = no
+}
+
+/* Channel to receive sFlow datagrams */
+#udp_recv_channel {
+#  port = 6343
+#}
+
+/* Optional sFlow settings */
+#sflow {
+# udp_port = 6343
+# accept_vm_metrics = yes
+# accept_jvm_metrics = yes
+# multiple_jvm_instances = no
+# accept_http_metrics = yes
+# multiple_http_instances = no
+# accept_memcache_metrics = yes
+# multiple_memcache_instances = no
+#}
+
+/* Each metrics module that is referenced by gmond must be specified and
+   loaded. If the module has been statically linked with gmond, it does
+   not require a load path. However all dynamically loadable modules must
+   include a load path. */
+modules {
+  module {
+    name = "core_metrics"
+  }
+  module {
+    name = "cpu_module"
+    path = "modcpu.so"
+  }
+  module {
+    name = "disk_module"
+    path = "moddisk.so"
+  }
+  module {
+    name = "load_module"
+    path = "modload.so"
+  }
+  module {
+    name = "mem_module"
+    path = "modmem.so"
+  }
+  module {
+    name = "net_module"
+    path = "modnet.so"
+  }
+  module {
+    name = "proc_module"
+    path = "modproc.so"
+  }
+  module {
+    name = "sys_module"
+    path = "modsys.so"
+  }
+}
+
+/* The old internal 2.5.x metric array has been replaced by the following
+   collection_group directives.  What follows is the default behavior for
+   collecting and sending metrics that is as close to 2.5.x behavior as
+   possible. */
+
+/* This collection group will cause a heartbeat (or beacon) to be sent every
+   20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses
+   the age of the running gmond. */
+collection_group {
+  collect_once = yes
+  time_threshold = 20
+  metric {
+    name = "heartbeat"
+  }
+}
+
+/* This collection group will send general info about this host*/
+collection_group {
+  collect_every = 60
+  time_threshold = 60
+  metric {
+    name = "cpu_num"
+    title = "CPU Count"
+  }
+  metric {
+    name = "cpu_speed"
+    title = "CPU Speed"
+  }
+  metric {
+    name = "mem_total"
+    title = "Memory Total"
+  }
+  metric {
+    name = "swap_total"
+    title = "Swap Space Total"
+  }
+  metric {
+    name = "boottime"
+    title = "Last Boot Time"
+  }
+  metric {
+    name = "machine_type"
+    title = "Machine Type"
+  }
+  metric {
+    name = "os_name"
+    title = "Operating System"
+  }
+  metric {
+    name = "os_release"
+    title = "Operating System Release"
+  }
+  metric {
+    name = "location"
+    title = "Location"
+  }
+}
+
+/* This collection group will send the status of gexecd for this host
+   every 300 secs.*/
+/* Unlike 2.5.x the default behavior is to report gexecd OFF. */
+collection_group {
+  collect_once = yes
+  time_threshold = 300
+  metric {
+    name = "gexec"
+    title = "Gexec Status"
+  }
+}
+
+/* This collection group will collect the CPU status info every 20 secs.
+   The time threshold is set to 90 seconds.  In honesty, this
+   time_threshold could be set significantly higher to reduce
+   unneccessary  network chatter. */
+collection_group {
+  collect_every = 20
+  time_threshold = 90
+  /* CPU status */
+  metric {
+    name = "cpu_user"
+    value_threshold = "1.0"
+    title = "CPU User"
+  }
+  metric {
+    name = "cpu_system"
+    value_threshold = "1.0"
+    title = "CPU System"
+  }
+  metric {
+    name = "cpu_idle"
+    value_threshold = "5.0"
+    title = "CPU Idle"
+  }
+  metric {
+    name = "cpu_nice"
+    value_threshold = "1.0"
+    title = "CPU Nice"
+  }
+  metric {
+    name = "cpu_aidle"
+    value_threshold = "5.0"
+    title = "CPU aidle"
+  }
+  metric {
+    name = "cpu_wio"
+    value_threshold = "1.0"
+    title = "CPU wio"
+  }
+  metric {
+    name = "cpu_steal"
+    value_threshold = "1.0"
+    title = "CPU steal"
+  }
+  /* The next two metrics are optional if you want more detail...
+     ... since they are accounted for in cpu_system.
+  metric {
+    name = "cpu_intr"
+    value_threshold = "1.0"
+    title = "CPU intr"
+  }
+  metric {
+    name = "cpu_sintr"
+    value_threshold = "1.0"
+    title = "CPU sintr"
+  }
+  */
+}
+
+collection_group {
+  collect_every = 20
+  time_threshold = 90
+  /* Load Averages */
+  metric {
+    name = "load_one"
+    value_threshold = "1.0"
+    title = "One Minute Load Average"
+  }
+  metric {
+    name = "load_five"
+    value_threshold = "1.0"
+    title = "Five Minute Load Average"
+  }
+  metric {
+    name = "load_fifteen"
+    value_threshold = "1.0"
+    title = "Fifteen Minute Load Average"
+  }
+}
+
+/* This group collects the number of running and total processes */
+collection_group {
+  collect_every = 80
+  time_threshold = 950
+  metric {
+    name = "proc_run"
+    value_threshold = "1.0"
+    title = "Total Running Processes"
+  }
+  metric {
+    name = "proc_total"
+    value_threshold = "1.0"
+    title = "Total Processes"
+  }
+}
+
+/* This collection group grabs the volatile memory metrics every 40 secs and
+   sends them at least every 180 secs.  This time_threshold can be increased
+   significantly to reduce unneeded network traffic. */
+collection_group {
+  collect_every = 40
+  time_threshold = 180
+  metric {
+    name = "mem_free"
+    value_threshold = "1024.0"
+    title = "Free Memory"
+  }
+  metric {
+    name = "mem_shared"
+    value_threshold = "1024.0"
+    title = "Shared Memory"
+  }
+  metric {
+    name = "mem_buffers"
+    value_threshold = "1024.0"
+    title = "Memory Buffers"
+  }
+  metric {
+    name = "mem_cached"
+    value_threshold = "1024.0"
+    title = "Cached Memory"
+  }
+  metric {
+    name = "swap_free"
+    value_threshold = "1024.0"
+    title = "Free Swap Space"
+  }
+}
+
+collection_group {
+  collect_every = 40
+  time_threshold = 300
+  metric {
+    name = "bytes_out"
+    value_threshold = 4096
+    title = "Bytes Sent"
+  }
+  metric {
+    name = "bytes_in"
+    value_threshold = 4096
+    title = "Bytes Received"
+  }
+  metric {
+    name = "pkts_in"
+    value_threshold = 256
+    title = "Packets Received"
+  }
+  metric {
+    name = "pkts_out"
+    value_threshold = 256
+    title = "Packets Sent"
+  }
+}
+
+/* Different than 2.5.x default since the old config made no sense */
+collection_group {
+  collect_every = 1800
+  time_threshold = 3600
+  metric {
+    name = "disk_total"
+    value_threshold = 1.0
+    title = "Total Disk Space"
+  }
+}
+
+collection_group {
+  collect_every = 40
+  time_threshold = 180
+  metric {
+    name = "disk_free"
+    value_threshold = 1.0
+    title = "Disk Space Available"
+  }
+  metric {
+    name = "part_max_used"
+    value_threshold = 1.0
+    title = "Maximum Disk Space Used"
+  }
+}
+
+include ("/etc/ganglia/conf.d/*.conf")
+
diff --git a/roles/ci/templates/jenkins-nagios.docker b/roles/ci/templates/jenkins-nagios.docker
index 2bdeb4e..32eef61 100644
--- a/roles/ci/templates/jenkins-nagios.docker
+++ b/roles/ci/templates/jenkins-nagios.docker
@@ -1,24 +1,28 @@
 FROM centos:7
 MAINTAINER Jean-Baptiste Aubort <jean-baptiste.aubort@epfl.ch>
 
 # Update packages
 RUN yum -y update
 RUN yum -y install epel-release
 
 # Nagios
 RUN yum -y install nrpe nagios-plugins-disk nagios-plugins-load nagios-plugins-procs rsyslog
 
 ADD nrpe.cfg /etc/nagios/nrpe.cfg
 ADD nrpe_local.cfg /etc/nagios/nrpe_local.cfg
 
 ADD check_mem.sh /usr/local/bin/check_mem.sh
 RUN chmod +x /usr/local/bin/check_mem.sh
 
 ADD run-nrpe.sh /
 RUN chmod +x /run-nrpe.sh
 
+# Ganglia client
+RUN yum -y install ganglia-gmond
+ADD gmond.conf /etc/ganglia/gmond.conf
+
 # Clean
 RUN yum clean all
 
 EXPOSE 5666
 CMD ["./run-nrpe.sh"]
diff --git a/roles/ci/templates/run-nrpe.sh b/roles/ci/templates/run-nrpe.sh
index bd30e3a..44c19b7 100644
--- a/roles/ci/templates/run-nrpe.sh
+++ b/roles/ci/templates/run-nrpe.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
 /usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d
+/usr/sbin/gmond
 tail -f /var/log/messages &
 wait