Coverage for src/srunx/workflows/tasks.py: 28%
25 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-21 03:27 +0900
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-21 03:27 +0900
1from srunx.client import Slurm
2from srunx.logging import get_logger
3from srunx.models import BaseJob, Job, ShellJob
5logger = get_logger(__name__)
8def submit_and_monitor_job(
9 job: Job | ShellJob, poll_interval: int = 5
10) -> Job | ShellJob:
11 """Submit a SLURM job and monitor until completion.
13 This task handles the complete lifecycle of a SLURM job:
14 submission, monitoring, and completion verification.
16 Args:
17 job: Job configuration.
18 poll_interval: Status polling interval in seconds.
20 Returns:
21 Completed Job instance.
23 Raises:
24 RuntimeError: If the SLURM job fails.
25 subprocess.CalledProcessError: If job operations fail.
26 """
27 logger.info(f"Starting SLURM job submission and monitoring for '{job.name}'")
28 client = Slurm()
30 # Submit the job
31 submitted_job = client.submit(job)
32 logger.info(f"Job '{submitted_job.name}' submitted with ID {submitted_job.job_id}")
34 # Wait for completion
35 completed_job = client.monitor(submitted_job, poll_interval=poll_interval)
36 logger.info(f"Job '{completed_job.name}' (ID: {completed_job.job_id}) completed")
38 assert isinstance(completed_job, Job | ShellJob)
40 return completed_job
43def submit_job_async(job: Job | ShellJob) -> Job | ShellJob:
44 """Submit a SLURM job without waiting for completion.
46 Args:
47 job: Job configuration.
49 Returns:
50 Submitted Job instance with job_id.
51 """
52 client = Slurm()
53 submitted_job = client.submit(job)
54 logger.info(f" - '{submitted_job.name}' (ID: {submitted_job.job_id})")
55 assert isinstance(submitted_job, Job | ShellJob)
56 return submitted_job
59def wait_for_job(job_id: int, poll_interval: int = 5) -> BaseJob:
60 """Wait for a job to complete.
62 Args:
63 job_id: SLURM job ID.
64 poll_interval: Polling interval in seconds.
66 Returns:
67 Completed job object.
68 """
69 logger.info(f"Waiting for job {job_id} to complete")
70 client = Slurm()
71 completed_job = client.monitor(job_id, poll_interval)
72 logger.info(f"Job {job_id} completed")
73 return completed_job