Coverage for src/srunx/workflows/tasks.py: 28%

25 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-21 03:27 +0900

1from srunx.client import Slurm 

2from srunx.logging import get_logger 

3from srunx.models import BaseJob, Job, ShellJob 

4 

5logger = get_logger(__name__) 

6 

7 

8def submit_and_monitor_job( 

9 job: Job | ShellJob, poll_interval: int = 5 

10) -> Job | ShellJob: 

11 """Submit a SLURM job and monitor until completion. 

12 

13 This task handles the complete lifecycle of a SLURM job: 

14 submission, monitoring, and completion verification. 

15 

16 Args: 

17 job: Job configuration. 

18 poll_interval: Status polling interval in seconds. 

19 

20 Returns: 

21 Completed Job instance. 

22 

23 Raises: 

24 RuntimeError: If the SLURM job fails. 

25 subprocess.CalledProcessError: If job operations fail. 

26 """ 

27 logger.info(f"Starting SLURM job submission and monitoring for '{job.name}'") 

28 client = Slurm() 

29 

30 # Submit the job 

31 submitted_job = client.submit(job) 

32 logger.info(f"Job '{submitted_job.name}' submitted with ID {submitted_job.job_id}") 

33 

34 # Wait for completion 

35 completed_job = client.monitor(submitted_job, poll_interval=poll_interval) 

36 logger.info(f"Job '{completed_job.name}' (ID: {completed_job.job_id}) completed") 

37 

38 assert isinstance(completed_job, Job | ShellJob) 

39 

40 return completed_job 

41 

42 

43def submit_job_async(job: Job | ShellJob) -> Job | ShellJob: 

44 """Submit a SLURM job without waiting for completion. 

45 

46 Args: 

47 job: Job configuration. 

48 

49 Returns: 

50 Submitted Job instance with job_id. 

51 """ 

52 client = Slurm() 

53 submitted_job = client.submit(job) 

54 logger.info(f" - '{submitted_job.name}' (ID: {submitted_job.job_id})") 

55 assert isinstance(submitted_job, Job | ShellJob) 

56 return submitted_job 

57 

58 

59def wait_for_job(job_id: int, poll_interval: int = 5) -> BaseJob: 

60 """Wait for a job to complete. 

61 

62 Args: 

63 job_id: SLURM job ID. 

64 poll_interval: Polling interval in seconds. 

65 

66 Returns: 

67 Completed job object. 

68 """ 

69 logger.info(f"Waiting for job {job_id} to complete") 

70 client = Slurm() 

71 completed_job = client.monitor(job_id, poll_interval) 

72 logger.info(f"Job {job_id} completed") 

73 return completed_job