src.co_tools.co_fasta
1import os 2import re 3from glob import glob 4from pathlib import Path 5from pathlib import PurePath 6 7if os.getenv("CO_LOG").lower() == "true": 8 from .get_logger import LOGGER 9 10 log = LOGGER 11else: 12 import logging 13 14 log = logging.getLogger(__name__) 15 16FASTA_EXTENSIONS = [".fa", ".fna", ".ffn", ".frn", ".fasta", ".faa"] 17ALL_SUFFIXES = FASTA_EXTENSIONS + [".gz", ".bgz"] 18 19 20def find_extension(input_file: str): 21 if isinstance(input_file, PurePath): 22 log.error(f"input_path {input_file} must not be a pathlib.PurePath object") 23 return "" 24 input_file_ext = re.sub(r".*\.f", r"\.f", input_file) 25 suffixes = Path(input_file_ext).suffixes 26 log.debug(f"Suffixes: {suffixes}") 27 if mismatch_suffix := set(suffixes) - set(ALL_SUFFIXES): 28 log.info(f"Suffix {mismatch_suffix} not allowed.") 29 else: 30 matching_suffix = set(suffixes) & set(FASTA_EXTENSIONS) 31 if len(matching_suffix) == 1: 32 log.info(f"Matched fasta file {input_file}") 33 return input_file 34 return "" 35 36 37def find_fasta_file(input_path: str): 38 if isinstance(input_path, PurePath): 39 log.error(f"input_path {input_path} must not be a pathlib.PurePath object") 40 return "" 41 if input_files := glob(f"{input_path}/**/*.f*", recursive=True): 42 log.debug(f"Found possible fasta matches: {input_files}") 43 else: 44 log.warning(f"No input files found in {input_path}") 45 return "" 46 47 matched_files = [] 48 49 for input_file in input_files: 50 log.debug(f"Input file: {input_file}") 51 fasta_file = find_extension(input_file) 52 if fasta_file: 53 matched_files.append(fasta_file) 54 if len(matched_files) > 1: 55 log.warning(f"More than one fasta file matched! Returning {matched_files[0]}") 56 return matched_files[0] 57 elif len(matched_files) == 1: 58 log.info(f"Matched {matched_files[0]}") 59 return matched_files[0] 60 else: 61 log.warning("Unable to find matching fasta file.") 62 return ""
def
find_extension(input_file: str):
21def find_extension(input_file: str): 22 if isinstance(input_file, PurePath): 23 log.error(f"input_path {input_file} must not be a pathlib.PurePath object") 24 return "" 25 input_file_ext = re.sub(r".*\.f", r"\.f", input_file) 26 suffixes = Path(input_file_ext).suffixes 27 log.debug(f"Suffixes: {suffixes}") 28 if mismatch_suffix := set(suffixes) - set(ALL_SUFFIXES): 29 log.info(f"Suffix {mismatch_suffix} not allowed.") 30 else: 31 matching_suffix = set(suffixes) & set(FASTA_EXTENSIONS) 32 if len(matching_suffix) == 1: 33 log.info(f"Matched fasta file {input_file}") 34 return input_file 35 return ""
def
find_fasta_file(input_path: str):
38def find_fasta_file(input_path: str): 39 if isinstance(input_path, PurePath): 40 log.error(f"input_path {input_path} must not be a pathlib.PurePath object") 41 return "" 42 if input_files := glob(f"{input_path}/**/*.f*", recursive=True): 43 log.debug(f"Found possible fasta matches: {input_files}") 44 else: 45 log.warning(f"No input files found in {input_path}") 46 return "" 47 48 matched_files = [] 49 50 for input_file in input_files: 51 log.debug(f"Input file: {input_file}") 52 fasta_file = find_extension(input_file) 53 if fasta_file: 54 matched_files.append(fasta_file) 55 if len(matched_files) > 1: 56 log.warning(f"More than one fasta file matched! Returning {matched_files[0]}") 57 return matched_files[0] 58 elif len(matched_files) == 1: 59 log.info(f"Matched {matched_files[0]}") 60 return matched_files[0] 61 else: 62 log.warning("Unable to find matching fasta file.") 63 return ""