diff --git a/job-submit.spec b/job-submit.spec new file mode 100644 index 0000000..b943b46 --- /dev/null +++ b/job-submit.spec @@ -0,0 +1,66 @@ +%define fxdir /usr/share/lua/5.1/job_submit_fx +%define name scitas-job-submit +%define version 1.0.0 +%define release 1%{?dist} + +Name: %{name} +Version: %{version} +Release: %{release} +License: GPLv3 +Summary: Suite of utilities for booting Scibian HPC clusters +URL: https://c4science.ch/source/scitas-job-submit +Source0: %{name}-%{version}.tar.gz +BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) + +%description +Collection of functions developed in LUA, integrated into the slurm +job_submit.lua script. + +%prep +%setup -q + +%build + +%install + +install -m 755 -d %{buildroot}%{fxdir} +install -m 755 -d %{buildroot}/etc/slurm/job_submit +%define fxtarget billing_cost_estimate +install -m 755 -d %{buildroot}%{fxdir}/%{fxtarget}/lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/init.lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/lua/init.lua + +%define fxtarget partition_setting +install -m 755 -d %{buildroot}%{fxdir}/%{fxtarget}/lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/init.lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/lua/init.lua + +%define fxtarget scitas_cost +install -m 755 -d %{buildroot}%{fxdir}/%{fxtarget}/lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/init.lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/lua/init.lua + +%define fxtarget scitas_debug +install -m 755 -d %{buildroot}%{fxdir}/%{fxtarget}/lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/init.lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/lua/init.lua + +%define fxtarget track_gres +install -m 755 -d %{buildroot}%{fxdir}/%{fxtarget}/lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/init.lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/lua/init.lua + +%define fxtarget verbose_mode +install -m 755 -d %{buildroot}%{fxdir}/%{fxtarget}/lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/init.lua +install -m 644 job_submit_fx/%{fxtarget} %{buildroot}%{fxdir}/%{fxtarget}/lua/init.lua + + +%clean +rm -rf %{buildroot} + + +%changelog + +* Mon Jul 26 2021 Antonio J. RUSSO - 1.0 +- Initial RPM release diff --git a/job_submit.lua b/job_submit.lua index d5c0176..f91528f 100755 --- a/job_submit.lua +++ b/job_submit.lua @@ -1,57 +1,79 @@ --- cf. slurm/slurm_errno.h -ESLURM_INVALID_GRES = 2072 - --########################################################################-- -- -- Load billing_cost_estimate parameters (can be overriden in rates_file) -- --########################################################################-- -CONF_DIR = '/etc/slurm/bce' -CONF_FILES = {'cluster.conf', 'rates.conf'} +CONF_DIR = '/etc/slurm/job_submit' +CONF_FILES = {'job_submit.conf', 'cluster.conf', 'rates.conf'} for index, file in ipairs(CONF_FILES) do filetoload = CONF_DIR.."/"..file file_fh = io.open(filetoload, "r") if file_fh == nil then slurm.log_info("slurm_job_modify: No readable %s found!", filetoload) else io.close(file_fh) dofile(filetoload) end end --- require fonctions verbose_mode, track_gres, scitas_cost.lua, partition_setting.lua --- -require(verbose_mode.lua) -require(track_gres.lua) -require(scitas_cost.lua) -require(partition_setting.lua) + +if FX_VERBOSE then + require('job_submit_fx/verbose_mode.lua') +end + +if FX_TRACK_GRES then + require('job_submit_fx/track_gres.lua') +end + + +if FX_SCITAS_COST then + require('job_submit_fx/scitas_cost.lua') +end + +if FX_PARTITION then + require('job_submit_fx/partition_setting.lua') +end function slurm_job_submit(job_desc, part_list, submit_uid) --- Verbose mode --- --- To enable verbose mode, you must set the verbose_mode variable to 1 - local verbose = 0 - if verbose == 1 then + if FX_VERBOSE then verbose_mode(job_desc) end + + if FX_PARTITION then + local partition = partition_setting (job_desc, submit_uid, INFINITE, SEVENTY, PARALLEL_PARTITION, SERIAL_PARTITION) + if job_desc.partition ~= partition then + job_desc.partition = partition + slurm.log_info("slurm_job_modify: for user %u , setting partition: %s", + submit_uid, partition) + end + end - status = track_gres(job_desc, submit_uid) - if status ~= 0 then - return status - end + if FX_TRACK_GRES then + status = track_gres(job_desc, submit_uid) + if status ~= 0 then + return status + end + end - scitas_cost(job_desc, CPU_COST, GPU_COST, CORES_PER_NODE, GPUS_PER_NODE, DEFAULT_WTIME, DEFAULT_PARTITION, PARALLEL_PARTITION, SERIAL_PARTITION, submit_uid) + if FX_SCITAS_COST then + scitas_cost(job_desc, CPU_COST, GPU_COST, CORES_PER_NODE, GPUS_PER_NODE, DEFAULT_WTIME, DEFAULT_PARTITION, PARALLEL_PARTITION, SERIAL_PARTITION, submit_uid) + end return slurm.SUCCESS end -- The other required function function slurm_job_modify(job_desc, job_rec, part_list, modify_uid) return slurm.SUCCESS end slurm.log_info("job_submit_plugin initialized") return slurm.SUCCESS diff --git a/billing_cost_estimate.lua b/job_submit_fx/billing_cost_estimate.lua similarity index 95% rename from billing_cost_estimate.lua rename to job_submit_fx/billing_cost_estimate.lua index 0a9f0a9..c2ee866 100644 --- a/billing_cost_estimate.lua +++ b/job_submit_fx/billing_cost_estimate.lua @@ -1,132 +1,134 @@ +-- Function billing_cost_estimate to display job cost -- function billing_cost_estimate (job_desc, cpu_cost, gpu_cost, corespernode, gpuspernode, defaultwtime, defaultpartition, parallel, serial, submit_uid) -- Initializing local variables + require('job_submit_fx/scitas_debug.lua') local gputres = nil local gpu = nil local chf = nil local timeinsec = nil local nodetres = job_desc.min_nodes local cputres = job_desc.min_cpus local wtime = job_desc.time_limit local partition = job_desc.partition local ntaskpernode = job_desc.ntasks_per_node local costunit = "cpu" local debugmode = scitas_debug(job_desc) local infisixteen = 65534 local infithirtytwo = 4294967294 local arrayindex = 1 if partition == nil then partition = defaultpartition end if job_desc.array_inx ~= nil then ak, av = string.match(job_desc.array_inx, "(.*)%-(.*)") if tonumber(ak) ~= nil and tonumber(av) ~= nil then arrayindex = tonumber(av) - tonumber(ak) + 1 end end if job_desc.gres ~= nil then gputres = string.match(job_desc.gres, "gpu.[0-9]+") end -- Convert time in seconds if wtime == nil or wtime == infithirtytwo or wtime == infisixteen then timeinsec = defaultwtime else timeinsec = wtime * 60 end -- Update cost unit if user request GPUs if gputres ~= nil then gk, gv = string.match(gputres, "(.*)%:(.*)") gpu=tonumber(gv) if gpu == nil or gpu < 1 then costunit = nil else costunit = "gpu" end end -- -- User does not define the number of nodes -- if nodetres == infithirtytwo or nodetres == infisixteen or nodetres == nil then if string.match(partition, serial) then node = 1 else node = 0 end else node = nodetres end -- CPUs -- First case: user does not define the number of CPUs -- if cputres == infithirtytwo or cputres == infisixteen or cputres == nil then cpu = corespernode -- -- Second case: user defines the number of CPUs -- else cpu = cputres end -- -- Special cases -- if string.match(partition, parallel) then cpu = corespernode if tonumber(cputres) >= tonumber(corespernode) and node == 0 then if ntaskpernode == infithirtytwo or ntaskpernode == nil or ntaskpernode == infisixteen then node = math.ceil(tonumber(cputres)/tonumber(corespernode)) else node = math.ceil(tonumber(cputres)/tonumber(ntaskpernode)) end end end -- -- Calculating price -- if costunit == "cpu" then if cpu == infithirtytwo or node == infithirtytwo then slurm.log_info("billing::: cannot determine the values to calculate the price") else chf = cpu * node * cpu_cost * timeinsec * arrayindex end elseif costunit == "gpu" then if node == 0 then - node = math.ceil(tonumber(gpu)/tonumber(gpuspernode)) - end + node = math.ceil(tonumber(gpu)/tonumber(gpuspernode)) + end chf = gpu * node * gpu_cost * timeinsec * arrayindex else slurm.log_info("billing::: cannot determine the unit type") end -- -- Print the price -- -- if chf ~= nil then if debugmode == 1 then slurm.log_user("billing::: cpu: "..cpu) slurm.log_user("billing::: gpu: "..gpu) slurm.log_user("billing::: node: "..node) slurm.log_user("billing::: timeinsec: "..timeinsec) end slurm.log_user("The estimated cost of this job is CHF "..string.format("%.2f",chf)) slurm.log_info("billing::: cost "..submit_uid.."|"..chf) else slurm.log_info("billing::: cannot calculate the price") end end diff --git a/partition_setting.lua b/job_submit_fx/partition_setting.lua similarity index 98% rename from partition_setting.lua rename to job_submit_fx/partition_setting.lua index f4e8541..97a86de 100644 --- a/partition_setting.lua +++ b/job_submit_fx/partition_setting.lua @@ -1,79 +1,80 @@ function partition_setting (job_desc, submit_uid, infinite, seventyfive, parallel, serial) + require('job_submit_fx/scitas_debug') local partition = job_desc.partition local debugmode = scitas_debug (job_desc) -- First case: The user has not declared a partition or a reservation if job_desc.partition == nil and job_desc.reservation == nil then if debugmode == 1 then slurm.log_user("partition_setting: first case") end -- Second case: The user has required one node (or slurm resolve it) if job_desc.max_nodes <= 1 then if debugmode == 1 then slurm.log_user("partition_setting: second case") end -- Third case: The user has required an exclusive node if job_desc.shared == 1 then if debugmode == 1 then slurm.log_user("partition_setting: third case") end partition = parallel else -- At this point, we have no partition, no reservation, one node not exclusive -- Check the number of cores -- Fourth case: The user has required less than 75 % of a node if job_desc.min_cpus <= seventyfive then if debugmode == 1 then slurm.log_user("partition_setting: fourth case") end partition = serial else -- Fifth case: The user has required more than 75 % of a node not exclusive if debugmode == 1 then slurm.log_user("partition_setting: fifth case") end partition = parallel end end else -- Sixth case: The user has required more than one node if job_desc.max_nodes ~= infinite then if debugmode == 1 then slurm.log_user("partition_setting: sixth case") end partition = parallel else -- Seventh case: The user not required a number of nodes if debugmode == 1 then slurm.log_user("partition_setting: seventh case") end if job_desc.min_cpus <= seventyfive then partition = serial else partition = parallel end end end end -- Eighth case: The user required less than 75 % of one non exclusive node and -- has declared the parallel partition outside of a reservation if job_desc.partition == parallel and job_desc.max_nodes == 1 and job_desc.reservation == nil and job_desc.min_cpus <= seventyfive and job_desc.shared ~= 0 then if debugmode == 1 then slurm.log_user("partition_setting: eighth case") end partition = serial end if debugmode == 1 then slurm.log_user("partition_setting: setting partition: %s",partition) end return partition end diff --git a/scitas_cost.lua b/job_submit_fx/scitas_cost.lua similarity index 88% rename from scitas_cost.lua rename to job_submit_fx/scitas_cost.lua index e6ea527..7467853 100644 --- a/scitas_cost.lua +++ b/job_submit_fx/scitas_cost.lua @@ -1,11 +1,12 @@ function scitas_cost (job_desc, cpu_cost, gpu_cost, corespernode, gpuspernode, defaultwtime, defaultpartition, parallel, serial, submit_uid) if job_desc.comment ~= nil then if string.match(job_desc.comment, "scitas.cost") then + require('job_submit_fx/billing_cost_estimate.lua') billing_cost_estimate(job_desc, cpu_cost, gpu_cost, corespernode, gpuspernode, defaultwtime, defaultpartition, parallel, serial, submit_uid) else return 0 end else return 0 end end diff --git a/job_submit_fx/scitas_debug.lua b/job_submit_fx/scitas_debug.lua new file mode 100644 index 0000000..7081d48 --- /dev/null +++ b/job_submit_fx/scitas_debug.lua @@ -0,0 +1,10 @@ +function scitas_debug (job_desc) + slurm.log_info("billing::: Scitas Debug") + if job_desc.comment ~= nil then + if string.match(job_desc.comment, "scitas.debug") then + return 1 + else + return 0 + end + end +end diff --git a/job_submit_fx/track_gres.lua b/job_submit_fx/track_gres.lua new file mode 100644 index 0000000..09bbc27 --- /dev/null +++ b/job_submit_fx/track_gres.lua @@ -0,0 +1,14 @@ +-- Check if minimum gres is respected +function track_gres (job_desc, submit_uid) + + local gres = job_desc["gres"] + local user = job_desc["user_name"] + + if ( gres == nil or string.find(gres, "gpu") == nil ) then + slurm.log_info("track_gres: job %u from user %s: minimum gres is not respected", submit_uid, user) + return ESLURM_INVALID_GRES + else + return 0 + end + +end diff --git a/job_submit_fx/verbose_mode.lua b/job_submit_fx/verbose_mode.lua new file mode 100644 index 0000000..f377393 --- /dev/null +++ b/job_submit_fx/verbose_mode.lua @@ -0,0 +1,10 @@ +-- Function verbose mode for debugging purpose +function verbose_mode (job_desc) + local job_desc_keys = { "account", "acctg_freq", "admin_comment", "alloc_node", "argc", "array_inx", "batch_features", "begin_time", "bitflags", "boards_per_node", "burst_buffer", "clusters", "comment", "contiguous", "cores_per_socket", "cpu_freq_min", "cpu_freq_max", "cpu_freq_gov", "cpus_per_task", "cpus_per_tres", "cron_job", "default_account", "default_qos", "delay_boot", "dependency", "end_time", "extra", "exc_nodes", "features", "gres", "tres_per_node", "group_id", "immediate", "licenses", "mail_type", "mail_user", "max_cpus", "max_nodes", "mem_per_tres", "min_cpus", "min_mem_per_node", "min_mem_per_cpu", "min_nodes", "name", "nice", "ntasks_per_board", "ntasks_per_core", "ntasks_per_gpu", "ntasks_per_node", "ntasks_per_socket", "ntasks_per_tres", "num_tasks", "pack_job_offset", "het_job_offset", "partition", "power_flags", "pn_min_cpus", "pn_min_memory", "pn_min_tmp_disk", "priority", "qos", "reboot", "req_nodes", "req_switch", "requeue", "reservation", "script", "shared", "oversubscribe", "site_factor", "sockets_per_node", "spank_job_env", "spank_job_env_size", "std_err", "std_in", "std_out", "threads_per_core", "time_limit", "time_min", "tres_bind", "tres_freq", "tres_per_job", "tres_per_node", "tres_per_socket", "tres_per_task", "tres_alloc_str", "user_id", "user_name", "wait4switch", "work_dir", "wckey"} + + for key,value in pairs(job_desc_keys) do + if job_desc[string.format(value)] ~= nil then + slurm.log_info("verbose::: "..value.."="..job_desc[string.format(value)]) + end + end +end diff --git a/verbose_mode.lua b/verbose_mode.lua deleted file mode 100644 index 88383fb..0000000 --- a/verbose_mode.lua +++ /dev/null @@ -1,9 +0,0 @@ -function verbose_mode (job_desc) - local job_desc_keys = {"account", "acctg_freq", "admin_comment", "alloc_node", "argc", "array_inx", "batch_features", "begin_time", "bitflags", "boards_per_node", "burst_buffer", "clusters", "comment", "contiguous", "cores_per_socket", "cpu_freq_min", "cpu_freq_max", "cpu_freq_gov", "cpus_per_task", "cpus_per_tres", "cron_job", "default_account", "default_qos", "delay_boot", "dependency", "end_time", "extra", "exc_nodes", "features", "gres", "tres_per_node", "group_id", "immediate", "licenses", "mail_type", "mail_user", "max_cpus", "max_nodes", "mem_per_tres", "min_cpus", "min_mem_per_node", "min_mem_per_cpu", "min_nodes", "name", "nice", "ntasks_per_board", "ntasks_per_core", "ntasks_per_gpu", "ntasks_per_node", "ntasks_per_socket", "ntasks_per_tres", "num_tasks", "pack_job_offset", "het_job_offset", "partition", "power_flags", "pn_min_cpus", "pn_min_memory", "pn_min_tmp_disk", "priority", "qos", "reboot", "req_nodes", "req_switch", "requeue", "reservation", "script", "shared", "oversubscribe", "site_factor", "sockets_per_node", "spank_job_env", "spank_job_env_size", "std_err", "std_in", "std_out", "threads_per_core", "time_limit", "time_min", "tres_bind", "tres_freq", "tres_per_job", "tres_per_node", "tres_per_socket", "tres_per_task", "tres_alloc_str", "user_id", "user_name", "wait4switch", "work_dir", "wckey"} - - for key,value in pairs(job_desc_keys) do - if job_desc[string.format(value)] ~= nil then - slurm.log_info("verbose::: "..value.."="..job_desc[string.format(value)]) - end - end -end