src.co_tools.co_utils
1import os 2from glob import glob 3from multiprocessing import cpu_count 4from pathlib import Path 5 6if os.getenv("CO_LOG").lower() == "true": 7 from .get_logger import LOGGER 8 9 log = LOGGER 10else: 11 import logging 12 13 log = logging.getLogger(__name__) 14 15co_cpus = os.getenv("CO_CPUS") 16aws_batch_job_id = os.getenv("AWS_BATCH_JOB_ID") 17 18 19def get_cpu_limit(co_cpus=co_cpus, aws_batch_job_id=aws_batch_job_id): 20 """This function returns an integer corresponding to the number of cores 21 22 Args: 23 co_cpus (int, optional): _description_. Defaults to co_cpus. 24 aws_batch_job_id (int, optional): _description_. Defaults to aws_batch_job_id. 25 26 Returns: 27 int: number of cores available for compute 28 """ 29 log.debug(f"co_cpus: {co_cpus} aws_batch_job_id: {aws_batch_job_id}") 30 if co_cpus: 31 return co_cpus 32 if aws_batch_job_id: 33 return 1 34 with open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") as fp: 35 cfs_quota_us = int(fp.read()) 36 with open("/sys/fs/cgroup/cpu/cpu.cfs_period_us") as fp: 37 cfs_period_us = int(fp.read()) 38 container_cpus = cfs_quota_us // cfs_period_us 39 # For physical machine, the `cfs_quota_us` could be '-1' 40 log.debug(f"container_cpus: {container_cpus} cpu_count(): {cpu_count()}") 41 return cpu_count() if container_cpus < 1 else container_cpus 42 43 44def get_dir_contents(dir: str = "../data"): 45 """This function finds all the files and folders in a dir 46 47 Args: 48 dir (str, optional): The folder you want to search in. Defaults to "../data". 49 50 Returns: 51 str: newline separated string of files and folders in the search dir. 52 """ 53 if dir_contents := glob(str(f"{dir}/**/*"), recursive=True): 54 log.debug(f"Found the following files in {dir} {dir_contents}") 55 return "\n".join(dir_contents) 56 log.warning(f"There are no files or folders in the {dir} folder.") 57 return 0 58 59 60def get_groups(filename: str = "../data/sample_sheet.csv"): 61 """This function returns all the groups in a .csv 62 63 Args: 64 filename (None): Path to a sample sheet. Will default to 65 ../data/sample_sheet.csv if no path supplied. If a filename is supplied, 66 this function will attempt to find the path to the file in the 67 ../data folder 68 69 Returns: 70 str: comma-separated string of groups in ascending alphabetical order. 71 """ 72 # if not filename: 73 # sample_sheet = "../data/sample_sheet.csv" 74 if Path(filename).is_file(): 75 log.debug(f"{filename} is a file.") 76 sample_sheet = filename 77 else: 78 log.debug(f"type for {filename}: {type(filename)}") 79 if files_found := glob(str(f"../data/{filename}"), recursive=True): 80 if len(files_found) > 1: 81 log.warning(f"Found multiple sample_sheets. Will use {files_found[0]}") 82 log.debug(f"Searching found the following sample sheet(s): {files_found}") 83 sample_sheet = files_found[0] 84 else: 85 log.warning(f"No sample sheet found for '{filename}'") 86 return 0 87 groups_set = set() 88 try: 89 with open(f"{sample_sheet}", "r") as infile: 90 lines = infile.readlines() 91 for line in lines: 92 line = line.strip() 93 line_group = line.split(",")[0] 94 groups_set.add(line_group) 95 groups = sorted(list(groups_set)) 96 log.debug(f"Returning the following groups from sample sheet: {groups}") 97 return ",".join(groups) 98 except Exception as e: 99 log.error(f"Could not open {sample_sheet} due to error {e}.") 100 return 0 101 102 103def is_pipeline(): 104 """This function lets confirms if code is executing in a pipeline 105 106 Returns: 107 int: Returns 1 if in a pipeline, 0 otherwise. 108 """ 109 return 1 if bool(os.getenv("AWS_BATCH_JOB_ID")) else 0 110 111 112def print_log_msg(msg=None, level="WARNING"): 113 level = level.upper() 114 if level == "DEBUG": 115 return log.debug(msg) 116 elif level == "INFO": 117 return log.info(msg) 118 elif level == "WARNING": 119 return log.warning(msg) 120 elif level == "ERROR": 121 return log.error(msg) 122 elif level == "CRITICAL": 123 return log.critical(msg) 124 else: 125 raise Exception( 126 "logging level is not one of [DEBUG, INFO, WARNING, ERROR, CRITICAL]" 127 )
20def get_cpu_limit(co_cpus=co_cpus, aws_batch_job_id=aws_batch_job_id): 21 """This function returns an integer corresponding to the number of cores 22 23 Args: 24 co_cpus (int, optional): _description_. Defaults to co_cpus. 25 aws_batch_job_id (int, optional): _description_. Defaults to aws_batch_job_id. 26 27 Returns: 28 int: number of cores available for compute 29 """ 30 log.debug(f"co_cpus: {co_cpus} aws_batch_job_id: {aws_batch_job_id}") 31 if co_cpus: 32 return co_cpus 33 if aws_batch_job_id: 34 return 1 35 with open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") as fp: 36 cfs_quota_us = int(fp.read()) 37 with open("/sys/fs/cgroup/cpu/cpu.cfs_period_us") as fp: 38 cfs_period_us = int(fp.read()) 39 container_cpus = cfs_quota_us // cfs_period_us 40 # For physical machine, the `cfs_quota_us` could be '-1' 41 log.debug(f"container_cpus: {container_cpus} cpu_count(): {cpu_count()}") 42 return cpu_count() if container_cpus < 1 else container_cpus
This function returns an integer corresponding to the number of cores
Args: co_cpus (int, optional): _description_. Defaults to co_cpus. aws_batch_job_id (int, optional): _description_. Defaults to aws_batch_job_id.
Returns: int: number of cores available for compute
45def get_dir_contents(dir: str = "../data"): 46 """This function finds all the files and folders in a dir 47 48 Args: 49 dir (str, optional): The folder you want to search in. Defaults to "../data". 50 51 Returns: 52 str: newline separated string of files and folders in the search dir. 53 """ 54 if dir_contents := glob(str(f"{dir}/**/*"), recursive=True): 55 log.debug(f"Found the following files in {dir} {dir_contents}") 56 return "\n".join(dir_contents) 57 log.warning(f"There are no files or folders in the {dir} folder.") 58 return 0
This function finds all the files and folders in a dir
Args: dir (str, optional): The folder you want to search in. Defaults to "../data".
Returns: str: newline separated string of files and folders in the search dir.
61def get_groups(filename: str = "../data/sample_sheet.csv"): 62 """This function returns all the groups in a .csv 63 64 Args: 65 filename (None): Path to a sample sheet. Will default to 66 ../data/sample_sheet.csv if no path supplied. If a filename is supplied, 67 this function will attempt to find the path to the file in the 68 ../data folder 69 70 Returns: 71 str: comma-separated string of groups in ascending alphabetical order. 72 """ 73 # if not filename: 74 # sample_sheet = "../data/sample_sheet.csv" 75 if Path(filename).is_file(): 76 log.debug(f"{filename} is a file.") 77 sample_sheet = filename 78 else: 79 log.debug(f"type for {filename}: {type(filename)}") 80 if files_found := glob(str(f"../data/{filename}"), recursive=True): 81 if len(files_found) > 1: 82 log.warning(f"Found multiple sample_sheets. Will use {files_found[0]}") 83 log.debug(f"Searching found the following sample sheet(s): {files_found}") 84 sample_sheet = files_found[0] 85 else: 86 log.warning(f"No sample sheet found for '{filename}'") 87 return 0 88 groups_set = set() 89 try: 90 with open(f"{sample_sheet}", "r") as infile: 91 lines = infile.readlines() 92 for line in lines: 93 line = line.strip() 94 line_group = line.split(",")[0] 95 groups_set.add(line_group) 96 groups = sorted(list(groups_set)) 97 log.debug(f"Returning the following groups from sample sheet: {groups}") 98 return ",".join(groups) 99 except Exception as e: 100 log.error(f"Could not open {sample_sheet} due to error {e}.") 101 return 0
This function returns all the groups in a .csv
Args: filename (None): Path to a sample sheet. Will default to ../data/sample_sheet.csv if no path supplied. If a filename is supplied, this function will attempt to find the path to the file in the ../data folder
Returns: str: comma-separated string of groups in ascending alphabetical order.
104def is_pipeline(): 105 """This function lets confirms if code is executing in a pipeline 106 107 Returns: 108 int: Returns 1 if in a pipeline, 0 otherwise. 109 """ 110 return 1 if bool(os.getenv("AWS_BATCH_JOB_ID")) else 0
This function lets confirms if code is executing in a pipeline
Returns: int: Returns 1 if in a pipeline, 0 otherwise.
113def print_log_msg(msg=None, level="WARNING"): 114 level = level.upper() 115 if level == "DEBUG": 116 return log.debug(msg) 117 elif level == "INFO": 118 return log.info(msg) 119 elif level == "WARNING": 120 return log.warning(msg) 121 elif level == "ERROR": 122 return log.error(msg) 123 elif level == "CRITICAL": 124 return log.critical(msg) 125 else: 126 raise Exception( 127 "logging level is not one of [DEBUG, INFO, WARNING, ERROR, CRITICAL]" 128 )