23
23
# cython: language_level=3
24
24
25
25
from pyslurm.core.error import verify_rpc, RPCError
26
- from pyslurm.utils.uint import *
26
+ from pyslurm.utils.uint import (
27
+ u16_parse,
28
+ u32_parse,
29
+ u64_parse,
30
+ )
31
+ from pyslurm.constants import UNLIMITED
27
32
from pyslurm.utils.ctime import _raw_time
28
33
from pyslurm.utils.helpers import (
29
34
cpu_freq_int_to_str,
30
35
instance_to_dict,
31
36
)
32
37
from pyslurm.utils import cstr
38
+ from typing import Union
39
+ import time
40
+ from enum import IntEnum
41
+
42
+
43
+ class ShutdownMode (IntEnum ):
44
+ """ Mode of operation for shutdown action"""
45
+ ALL = 0
46
+ CORE_FILE = 1
47
+ CONTROLLER_ONLY = 2
48
+
49
+
50
+ cdef class PingResponse:
51
+
52
+ def to_dict (self ):
53
+ """ Slurmctld ping response formatted as dictionary.
54
+
55
+ Returns:
56
+ (dict): Ping response as a dict
57
+
58
+ Examples:
59
+ >>> from pyslurm import slurmctld
60
+ >>> ctld_primary = slurmctld.Config.ping(0)
61
+ >>> primary_dict = ctld_primary.to_dict()
62
+ """
63
+ return instance_to_dict(self )
64
+
65
+
66
+ def ping (index ):
67
+ """ Ping a Slurm controller
68
+
69
+ Returns:
70
+ (pyslurm.slurmctld.PingResponse): a ping response
71
+
72
+ Examples:
73
+ >>> from pyslurm import slurmctld
74
+ >>> resp = slurmctld.ping(0)
75
+ >>> print(resp.hostname, resp.latency)
76
+ slurmctl 1.246
77
+ """
78
+ t0 = time.perf_counter()
79
+ rc = slurm_ping(index)
80
+ t1 = time.perf_counter()
81
+
82
+ verify_rpc(rc)
83
+ ctl_cnt = slurm.slurm_conf.control_cnt
84
+
85
+ if index >= ctl_cnt:
86
+ raise RPCError(msg = " Invalid Index specified." )
87
+
88
+ info = PingResponse()
89
+ info.is_primary = index == 0
90
+ info.is_responding = not rc
91
+ info.index = index
92
+ info.hostname = cstr.to_unicode(slurm.slurm_conf.control_machine[index])
93
+ info.latency = round ((t1 - t0) * 1000 , 3 )
94
+
95
+ return info
96
+
97
+
98
+ def ping_primary ():
99
+ """ Ping the primary Slurm Controller.
100
+
101
+ See `ping()` for more information and examples.
102
+
103
+ Returns:
104
+ (pyslurm.slurmctld.PingResponse): a ping response
105
+ """
106
+ return ping(0 )
107
+
108
+
109
+ def ping_backup ():
110
+ """ Ping the first backup Slurm Controller.
111
+
112
+ See `ping()` for more information and examples.
113
+
114
+ Returns:
115
+ (pyslurm.slurmctld.PingResponse): a ping response
116
+ """
117
+ return ping(1 )
118
+
119
+
120
+ def ping_all ():
121
+ """ Ping all Slurm Controllers.
122
+
123
+ Returns:
124
+ (list[pyslurm.slurmctld.PingResponse]): a list of ping responses
125
+
126
+ Raises:
127
+ (pyslurm.RPCError): When the ping was not successful.
128
+
129
+ Examples:
130
+ >>> from pyslurm import slurmctld
131
+ >>> resps = slurmctld.ping_all()
132
+ >>> for resp in resps:
133
+ ... print(resp.hostname, resp.latency)
134
+ ...
135
+ slurmctl 1.246
136
+ slurmctlbackup 1.373
137
+ """
138
+ cdef list out = []
139
+
140
+ ctl_cnt = slurm.slurm_conf.control_cnt
141
+ for i in range (ctl_cnt):
142
+ out.append(ping(i))
143
+
144
+ return out
145
+
146
+
147
+ def shutdown (mode: Union[ShutdownMode , int]):
148
+ """ Shutdown Slurm Controller or all Daemons
149
+
150
+ Args:
151
+ mode:
152
+ Whether only the Slurm controller shut be downed, or also all other
153
+ slurmd daemons.
154
+
155
+ Raises:
156
+ (pyslurm.RPCError): When shutdowning the daemons was not successful.
157
+ """
158
+ verify_rpc(slurm_shutdown(int (mode)))
159
+
160
+
161
+ def reconfigure ():
162
+ """ Trigger Slurm Controller to reload the Config
163
+
164
+ Raises:
165
+ (pyslurm.RPCError): When reconfiguring was not successful.
166
+ """
167
+ verify_rpc(slurm_reconfigure())
168
+
169
+
170
+ def takeover (index = 1 ):
171
+ """ Let a Backup Slurm Controller take over as the Primary.
172
+
173
+ Args:
174
+ index (int, optional = 1):
175
+ Index of the Backup Controller that should take over. By default,
176
+ the `index` is `1`, meaning the next Controller configured after
177
+ the Primary in slurm.conf (second `SlurmctlHost` entry) will be
178
+ asked to take over operation.
179
+
180
+ If you have more than one backup controller configured, you can for
181
+ example also pass `2` as the index.
182
+
183
+ Raises:
184
+ (pyslurm.RPCError): When reconfiguring was not successful.
185
+ """
186
+ verify_rpc(slurm_takeover(index))
33
187
34
188
35
189
cdef class MPIConfig:
@@ -75,7 +229,7 @@ cdef class MPIConfig:
75
229
76
230
cdef class CgroupConfig:
77
231
78
- def __init__ (self , job_id ):
232
+ def __init__ (self ):
79
233
raise RuntimeError (" Cannot instantiate class directly" )
80
234
81
235
def to_dict (self ):
@@ -121,7 +275,7 @@ cdef class CgroupConfig:
121
275
122
276
cdef class AccountingGatherConfig:
123
277
124
- def __init__ (self , job_id ):
278
+ def __init__ (self ):
125
279
raise RuntimeError (" Cannot instantiate class directly" )
126
280
127
281
def to_dict (self ):
@@ -147,7 +301,7 @@ cdef class AccountingGatherConfig:
147
301
out.energy_ipmi_calc_adjustment = _yesno_to_bool(
148
302
conf.get(" EnergyIPMICalcAdjustment" ))
149
303
150
- # TODO: dict
304
+ # TODO: maybe dict?
151
305
out.energy_ipmi_power_sensors = conf.get(" EnergyIPMIPowerSensors" )
152
306
153
307
out.energy_ipmi_user_name = conf.get(" EnergyIPMIUsername" )
@@ -176,8 +330,9 @@ cdef class Config:
176
330
def __cinit__ (self ):
177
331
self .ptr = NULL
178
332
179
- def __init__ (self , job_id ):
180
- raise RuntimeError (" Cannot instantiate class directly" )
333
+ def __init__ (self ):
334
+ raise RuntimeError (" Cannot instantiate class directly. "
335
+ " Use slurmctld.Config.load() to get an instance." )
181
336
182
337
def __dealloc__ (self ):
183
338
slurm_free_ctl_conf(self .ptr)
@@ -201,13 +356,21 @@ cdef class Config:
201
356
202
357
@staticmethod
203
358
def load ():
359
+ """ Load the current Slurm configuration (slurm.conf)
360
+
361
+ This also loads the following other configurations:
362
+ * `cgroup.conf` (`cgroup_config`)
363
+ * `acct_gather.conf` (`accounting_gather_config`)
364
+ * `mpi.conf` (`mpi_config`)
365
+ """
204
366
cdef Config conf = Config.__new__ (Config)
205
367
verify_rpc(slurm_load_ctl_conf(0 , & conf.ptr))
206
368
207
369
conf.cgroup_config = CgroupConfig.from_ptr(conf.ptr.cgroup_conf)
208
370
conf.accounting_gather_config = AccountingGatherConfig.from_ptr(
209
371
conf.ptr.acct_gather_conf)
210
372
conf.mpi_config = MPIConfig.from_ptr(conf.ptr.mpi_conf)
373
+ # TODO: node_features_conf
211
374
212
375
return conf
213
376
@@ -431,16 +594,6 @@ cdef class Config:
431
594
return cstr.to_list_with_count(self .ptr.epilog_slurmctld,
432
595
self .ptr.epilog_slurmctld_cnt)
433
596
434
- # @property
435
- # def external_sensors_type(self):
436
- # return cstr.to_unicode(self.ptr.ext_sensors_type)
437
-
438
- # @property
439
- # def external_sensors_frequency(self):
440
- # return u16_parse(self.ptr.ext_sensors_freq)
441
-
442
- # TODO: void *ext_sensors_conf put into own class?
443
-
444
597
@property
445
598
def federation_parameters (self ):
446
599
return cstr.to_list(self .ptr.fed_params)
@@ -469,7 +622,6 @@ cdef class Config:
469
622
470
623
@property
471
624
def group_update_force (self ):
472
- # TODO: maybe bool?
473
625
return u16_parse_bool(self .ptr.group_force)
474
626
475
627
@property
@@ -485,7 +637,6 @@ cdef class Config:
485
637
val = u32_parse(self .ptr.hash_val)
486
638
if not val:
487
639
return None
488
-
489
640
return hex (val)
490
641
491
642
@property
@@ -534,10 +685,6 @@ cdef class Config:
534
685
def job_completion_parameters (self ):
535
686
return cstr.to_list(self .ptr.job_comp_params)
536
687
537
- # @property
538
- # def job_completion_password(self):
539
- # return cstr.to_unicode(self.ptr.job_comp_pass)
540
-
541
688
@property
542
689
def job_completion_port (self ):
543
690
return u32_parse(self .ptr.job_comp_port)
@@ -675,8 +822,6 @@ cdef class Config:
675
822
def next_job_id (self ):
676
823
return u32_parse(self .ptr.next_job_id)
677
824
678
- # TODO: void *node_features_conf put into own class?
679
-
680
825
@property
681
826
def node_features_plugins (self ):
682
827
return cstr.to_list(self .ptr.node_features_plugins)
@@ -686,22 +831,13 @@ cdef class Config:
686
831
return u16_parse(self .ptr.over_time_limit)
687
832
688
833
@property
689
- def plugin_path (self ):
690
- # TODO: maybe list
691
- return cstr.to_unicode(self .ptr.plugindir)
834
+ def plugin_dirs (self ):
835
+ return cstr.to_list(self .ptr.plugindir, None , " :" )
692
836
693
837
@property
694
838
def plugin_stack_config (self ):
695
839
return cstr.to_unicode(self .ptr.plugstack)
696
840
697
- # @property
698
- # def power_parameters(self):
699
- # return cstr.to_list(self.ptr.power_parameters)
700
-
701
- # @property
702
- # def power_plugin(self):
703
- # return cstr.to_unicode(self.ptr.power_plugin)
704
-
705
841
@property
706
842
def preempt_exempt_time (self ):
707
843
# seconds?
@@ -1295,6 +1431,7 @@ def _log_level_int_to_str(flags):
1295
1431
else :
1296
1432
return data
1297
1433
1434
+
1298
1435
def _acct_store_flags_int_to_str (flags ):
1299
1436
cdef list out = []
1300
1437
@@ -1311,6 +1448,7 @@ def _acct_store_flags_int_to_str(flags):
1311
1448
1312
1449
return out
1313
1450
1451
+
1314
1452
def _get_memory (value , per_cpu ):
1315
1453
if value != slurm.NO_VAL64:
1316
1454
if value & slurm.MEM_PER_CPU and per_cpu:
0 commit comments