diff --git a/deploy/main.yml b/deploy/main.yml index bd1aa69..b98106f 100644 --- a/deploy/main.yml +++ b/deploy/main.yml @@ -1,40 +1,41 @@ --- - name: Frontal hosts: frontal vars_files: - "vars/main.yml" roles: - role: frontal - name: Deploy the nodes for the first time gather_facts: no hosts: - compute vars_files: - "vars/main.yml" roles: - role: first_deploy - name: Common parts for all nodes hosts: - compute - frontal vars_files: - "vars/main.yml" roles: - role: common - role: nfs - role: ldap_auth - role: spack - - role: beegfs - + - role: slurm +# - role: beegfs + - name: Configure compute nodes hosts: compute vars_files: - "vars/main.yml" roles: - role: filesystem - role: collectd - + # tasks: # - include: roles/slurm/tasks/client.yml diff --git a/deploy/roles/common/tasks/main.yml b/deploy/roles/common/tasks/main.yml index 4ea3128..9d3592e 100644 --- a/deploy/roles/common/tasks/main.yml +++ b/deploy/roles/common/tasks/main.yml @@ -1,91 +1,86 @@ --- # Packages - apt: name: "{{ item }}" update_cache: yes with_items: - htop - iotop - iftop - iperf - screen - vim - tcpdump - lsof - ntp - qtbase5-dev - emacs-nox # SSH Keys - name: SSH Keys for root authorized_key: user=root key="{{ item }}" with_items: "{{ ssh_keys }}" # Time - copy: content: "Europe/Zurich" dest: "/etc/timezone" - file: state: "link" src: "/usr/share/zoneinfo/Europe/Zurich" dest: "/etc/localtime" # Time - name: Enable NTP service: name: ntp enabled: true state: started - lineinfile: dest: /etc/ntp.conf line: "server {{ groups.frontal[0] }} prefer" insertbefore: 'pool 0\.debian\.pool\.ntp\.org iburst' notify: restart ntpd when: "'compute' in group_names" # System - hostname: name: "{{ inventory_hostname }}" - replace: path: /etc/ssh/sshd_config regexp: '#HostbasedAuthentication no ' replace: 'HostbasedAuthentication yes' notify: restart sshd tags: ssh - template: src: ssh_know_hosts dest: /etc/ssh/ssh_known_hosts notify: restart sshd tags: ssh - - + - name: Creating some users block: - group: name: "{{ item.key }}" gid: "{{ item.value }}" with_dict: "{{ ids }}" - user: name: spack uid: "{{ ids.spack }}" shell: /bin/bash create_home: yes generate_ssh_key: "{{ 'frontal' in group_names }}" register: user_spack - authorized_key: user=spack key="{{ user_spack.ssh_public_key }}" when: "'frontal' in group_names" - - user: - name: slurm - uid: "{{ ids.slurm }}" - system: yes vars: ids: spack: 206 - slurm: 204 + tags: - users diff --git a/deploy/roles/first_deploy/tasks/main.yml b/deploy/roles/first_deploy/tasks/main.yml index 3daeb26..a609f16 100644 --- a/deploy/roles/first_deploy/tasks/main.yml +++ b/deploy/roles/first_deploy/tasks/main.yml @@ -1,44 +1,47 @@ --- - name: first boot and deploy keys block: - shell: | + log_file {{ log_file }}; spawn -noecho ssh -q -o StrictHostKeyChecking=no root@{{ inventory_hostname }} expect "password: " send "{{ armbian_default_pass }}\n" expect { "(current) UNIX password: " { send "{{ armbian_default_pass }}\n" exp_continue } "Enter new UNIX password: " { send "{{ armbian_new_pass }}\n" exp_continue } "Retype new UNIX password: " { send "{{ armbian_new_pass }}\n" exp_continue } "Please provide a username (eg. your forename): " { #send odroidc2\n send "\003" exp_continue } } close + vars: + log_file: /root/.first_boot_done_{{ inventory_hostname_short }} args: executable: /usr/bin/expect - creates: /root/.first_boot_done_{{ inventory_hostname_short }} + creates: "{{ log_file }}" delegate_to: "{{ groups['frontal'][0] }}" - authorized_key: user=root key="{{ item }}" with_items: "{{ ssh_keys }}" vars: ansible_ssh_pass: odroidc2 ansible_ssh_extra_args: "-o StrictHostKeyChecking=no -o PubkeyAuthentication=no" always: - meta: clear_host_errors tags: - first diff --git a/deploy/roles/nfs/templates/exports b/deploy/roles/nfs/templates/exports index e25a740..9147696 100644 --- a/deploy/roles/nfs/templates/exports +++ b/deploy/roles/nfs/templates/exports @@ -1,3 +1,3 @@ {% for export in nfs_exports %} -{{ export }} *.agamemnon.hpc.epfl.ch(rw,sync,no_subtree_check) +{{ export }} *.agamemnon.hpc.epfl.ch(rw,sync,no_subtree_check,no_root_squash) {% endfor %} diff --git a/deploy/roles/slurm/handlers/main.yml b/deploy/roles/slurm/handlers/main.yml index 54db497..78b4efc 100644 --- a/deploy/roles/slurm/handlers/main.yml +++ b/deploy/roles/slurm/handlers/main.yml @@ -1,3 +1,23 @@ --- - name: restart systemctl shell: systemctl daemon-reload + +- name: restart slurmd + service: + name: slurmd + state: restarted + +- name: restart munge + service: + name: munge + state: restarted + +- name: restart slurmctld + service: + name: slurmctld + state: restarted + +- name: restart slurmdbd + service: + name: slurmdbd + state: restarted diff --git a/deploy/roles/slurm/meta/main.yml b/deploy/roles/slurm/meta/main.yml index 51c29c9..4dd4ea7 100644 --- a/deploy/roles/slurm/meta/main.yml +++ b/deploy/roles/slurm/meta/main.yml @@ -1,3 +1,3 @@ --- -- dependencies: - - spack +#dependencies: +# - spack diff --git a/deploy/roles/slurm/tasks/main.yml b/deploy/roles/slurm/tasks/main.yml index b3d50c3..33eceda 100644 --- a/deploy/roles/slurm/tasks/main.yml +++ b/deploy/roles/slurm/tasks/main.yml @@ -1,35 +1,228 @@ --- -- name: gather info +#- name: gather info +# block: +# - shell: "spack find -p munge arch=$(spack arch) | tail -n1 | awk '{ print $2; }'" +# register: munge_location_shell +# changed_when: (munge_location is undefined) or (munge_location != munge_location_shell.stdout) +# +# - shell: "spack find -p slurm arch=$(spack arch) | tail -n1 | awk '{ print $2; }'" +# register: slurm_location_shell +# changed_when: (slurm_location is undefined) or (slurm_location != slurm_location_shell.stdout) +# +# - set_fact: +# munge_location: "{{ munge_location_shell.stdout }}" +# slurm_location: "{{ slurm_location_shell.stdout }}" +# cacheable: yes +# become: yes +# tags: slurm + +- name: create users + block: + - group: + name: "{{ item.key }}" + gid: "{{ item.value }}" + with_dict: "{{ ids }}" + - user: + name: "{{ item.key }}" + uid: "{{ item.value }}" + system: yes + with_dict: "{{ ids }}" + vars: + ids: + munge: 207 + slurm: 204 + tags: + - slurm + - munge + - systemd + +- name: git munge and slurm block: - - shell: "spack find -p munge arch={{ spack_arch }}" - register: munge_location_shell + - git: + repo: 'https://github.com/dun/munge.git' + dest: /softs/slurm/munge + update: no + - git: + repo: 'https://github.com/SchedMD/slurm.git' + dest: /softs/slurm/slurm + update: no + when: "inventory_hostname == groups['frontal'][0]" + tags: slurm + +- name: apt dependencies + apt: + name: + - rsync + - libbz2-dev + - libgcrypt20-dev + - pkg-config + - liblz4-dev + - libhwloc-dev + - libnuma-dev + - libpam0g-dev + - libcurl4-openssl-dev + - libreadline-dev + - libssh2-1-dev + - libncurses-dev + - libssl-dev + - libjson-c-dev + - libhdf5-dev + update_cache: yes + tags: + - packages + - slurm + - munge + +- name: git munge + block: + - file: + name: "/tmp/build-munge-{{ spack_arch }}" + state: directory + - shell: | + /softs/slurm/munge/configure \ + --prefix=/usr/local \ + --sysconfdir=/etc \ + --localstatedir=/var \ + > {{ munge_log_file }} + + make -j4 >> {{ munge_log_file }} + make unsinstall + make install >> {{ munge_log_file }} + ldconfig + args: + chdir: /tmp/build-munge-{{ spack_arch }} + creates: "{{ munge_log_file }}" + rescue: + - file: + name: "{{ munge_log_file }}" + state: absent + vars: + munge_log_file: /root/build-munge-{{ spack_arch }}.log + tags: + - slurm + - munge + +- name: git slurm + block: + - file: + name: "{{ item }}" + state: directory + with_items: + - "/usr/local/lib/systemd/system/" + - "/tmp/build-slurm-{{ spack_arch }}" + - shell: | + /softs/slurm/slurm/configure \ + --prefix=/usr/local \ + --sysconfdir=/etc/slurm \ + > {{ log_file }} + + make -j4 >> {{ log_file }} + make install >> {{ log_file }} + args: + chdir: /tmp/build-slurm-{{ spack_arch }} + #creates: /root/build-slurm-{{ spack_arch }}.log + - file: + name: '/etc/init.d/munge' + present: no + - systemd: + name: '/usr/local/lib/systemd/system/munge.service' + enable: yes + daemon_reload: yes + rescue: + - file: + name: "{{ log_file }}" + state: absent + vars: + log_file: /root/build-slurm-{{ spack_arch }}.log + tags: + - slurm - - shell: "spack find -p slurm arch={{ spack_arch }}" - register: slurm_location_shell - - set_facts: - munge_location: "{{ munge_location_shell.stdout }}/lib/munge/systemd" - slurm_location: "{{ slurm_location_shell.stdout }}/etc" - become: yes +- name: configure slurm + block: + - file: + state: directory + name: "{{ item }}" + with_items: + - /etc/munge + - /etc/slurm tags: slurm -- name: install slurm on compute nodes - systemd: - name: "{{ item }}" - enabled: yes - with_items: - - "{{ munge_location }}/munge.service" - - "{{ slurm_location }}/slurmd.service" - when: "'compute' in group_names" +- name: generate munge key + shell: dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key + args: + creates: /etc/munge/munge.key + when: "inventory_hostname == groups['frontal'][0]" tags: slurm +- name: configure slurm + block: + - file: + name: "{{ item }}" + state: directory + owner: munge + group: munge + recurse: yes + with_items: + - '/var/log/munge' + - '/var/run/munge' + - '/etc/munge' + - file: + name: "{{ item }}" + state: directory + owner: slurm + group: slurm + recurse: yes + with_items: + - '/var/log/slurm-llnl' + - '/var/lib/slurm-llnl' + - '/var/run/slurm-llnl' + - '/var/lib/slurm-llnl/acct/' + - template: + src: slurm.conf + dest: "/etc/slurm/slurm.conf" + tags: + - slurm + - munge + - systemd + +- name: install slurm on compute nodes + block: + - copy: + src: slurmd.service + dest: /etc/systemd/system/multi-user.target.wants + - systemd: + daemon_reload: yes + - systemd: + name: "{{ item }}" + state: started + with_items: + - "munge" + - "slurmd" + when: "'compute' in group_names" + tags: + - slurm + - munge + - systemd - name: install slurm on frontal - systemd: - name: "{{ item }}" - enabled: yes - with_items: - - "{{ slurm_location }}/slurmctld.service" - - "{{ slurm_location }}/slurmdbd.service" + block: + - copy: + src: slurm.sh + dest: /etc/profile.d + - copy: + src: slurmctld.service + dest: /etc/systemd/system/multi-user.target.wants + - systemd: + daemon_reload: yes + - systemd: + name: "{{ item }}" + state: started + with_items: + - "munge" + - "slurmctld" when: "'frontal' in group_names" - tags: slurm + tags: + - slurm + - munge + - systemd diff --git a/deploy/roles/spack/defaults/main.yml b/deploy/roles/spack/defaults/main.yml index 66f14e1..adeaa44 100644 --- a/deploy/roles/spack/defaults/main.yml +++ b/deploy/roles/spack/defaults/main.yml @@ -1,3 +1,4 @@ --- ansible_become_method: su ansible_become_user: spack +ansible_timeout: 20 diff --git a/deploy/roles/spack/tasks/main.yml b/deploy/roles/spack/tasks/main.yml index 7608e73..1932e58 100644 --- a/deploy/roles/spack/tasks/main.yml +++ b/deploy/roles/spack/tasks/main.yml @@ -1,146 +1,150 @@ + --- - name: setting spack env block: - copy: src: .bashrc dest: /home/spack owner: spack group: spack - apt: name: "{{ item }}" with_items: - curl - g++ - gfortran - g++-6 - gfortran-6 - cmake - python - python3 - automake tags: - packages - shell: spack compiler find register: spack_compiler become_flags: --login become: yes changed_when: "'Found no new compilers' not in spack_compiler.stdout" tags: spack - name: gathering facts block: - shell: gcc-6 -dumpversion register: gcc_version_gather changed_when: (gcc_version is undefined) or (gcc_version != gcc_version_gather.stdout) - shell: "spack arch" become: yes become_flags: --login register: spack_arch_gather changed_when: (spack_arch is undefined) or (spack_arch != spack_arch_gather.stdout) - set_fact: gcc_version: "{{ gcc_version_gather.stdout }}" spack_arch: "{{ spack_arch_gather.stdout }}" cacheable: yes - setup: delegate_to: "{{ item }}" delegate_facts: True with_items: "{{ groups['frontal'] }}" tags: - facts - spack + - slurm - name: installing spack block: - debug: var: gcc_versions - git: repo: 'https://github.com/spack/spack.git' dest: /softs/spack update: no - template: src: modules.yaml dest: /softs/spack/etc/spack vars: gcc_versions: "{{ groups['all'] | map('extract', hostvars, 'gcc_version') | list | select('defined') | sort | unique }}" become: yes tags: spack when: "'frontal' in group_names" - name: create groups per arch group_by: key: "{{ spack_arch }}" tags: - spack + - spack + - slurm - name: setup lmod block: - shell: "spack install lmod%gcc@{{ gcc_version }}" become: yes become_flags: --login register: spack_install_lmod when: "inventory_hostname == groups[spack_arch][0]" changed_when: "'lmod is already installed' not in spack_install_lmod.stdout" - shell: "spack find -p lmod arch=$(spack arch) | tail -n1 | awk '{ print $2; }'" become: yes become_flags: --login register: lmod_path_gather changed_when: (lmod_path is undefined) or (lmod_path != lmod_path_gather.stdout) - set_fact: lmod_path: "{{ lmod_path_gather.stdout }}" cacheable: yes - file: src: "{{ lmod_path_gather.stdout }}/lmod/lmod/init/sh" state: link dest: "/etc/profile.d/lmod.sh" force: yes - template: src: lmod_conf.sh dest: "/etc/profile.d/lmod_conf.sh" tags: - facts - spack - lmod - name: setting up distcc block: - apt: name: "{{ item }}" with_items: - distcc tags: - packages - template: src: distcc_hosts dest: /etc/distcc/hosts notify: restart distcc - template: src: distcc dest: /etc/default/distcc notify: restart distcc when: "'frontal' not in group_names" tags: - spack - distcc -- name: install softs with spack - shell: "spack install {{ item }}%gcc@{{ gcc_version }}" - become: yes - become_flags: --login - when: "inventory_hostname == groups[spack_arch][0]" - register: spack_install - changed_when: "'is already installed' not in spack_install_lmod.stdout_lines[-1]" - tags: - - spack - with_items: - - slurm +#- name: install softs with spack +# shell: "spack install {{ item }}%gcc@{{ gcc_version }}" +# become: yes +# become_flags: --login +# when: "inventory_hostname == groups[spack_arch][0]" +# register: spack_install +# changed_when: "'is already installed' not in spack_install.stdout_lines[-1]" +# with_items: +# - "slurm +system_config ^munge+system_config" +# tags: +# - spack +# - slurm