Source code for pacbio_data_processing.bam_file_filter

#######################################################################
#
# Copyright (C) 2020 David Velázquez
# Copyright (C) 2020, 2021 David Palao
#
# This file is part of PacBio data processing.
#
#  PacBioDataProcessing is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  PacBio data processing is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with PacBioDataProcessing. If not, see <http://www.gnu.org/licenses/>.
#
#######################################################################

"""This module contains the high level functions necessary to apply
some filters to a given input BAM file."""

import logging

from .ui.cl import parse_cl_bam_filter as parse_cl
from .bam import BamFile
from .logs import config_logging
from .filters import (
    filter_seq_len, filter_enough_data_per_molecule, filter_quality,
)
from .parameters import BamFilteringParameters
from .errors import high_level_handler


[docs]class BamFilter:
[docs] def __init__(self, parameters): self.input_parameters = parameters self.filters = [] min_seq_len = self.input_parameters.min_dna_seq_length if min_seq_len: self.filters.append((filter_seq_len, min_seq_len)) min_subreads_per_molecule = ( self.input_parameters.min_subreads_per_molecule) if min_subreads_per_molecule > 1: self.filters.append( (filter_enough_data_per_molecule, min_subreads_per_molecule) ) quality_th = self.input_parameters.quality_threshold if quality_th: self.filters.append((filter_quality, quality_th)) mappings = self.input_parameters.limit_mappings if mappings: self.filters.append( (self.input_parameters.filter_mappings, mappings, self.input_parameters.min_relative_mapping_ratio) )
def _apply_filters(self, lines): for f, *args in self.filters: lines = f(lines, *args) yield from lines def _write_output(self, header, body): outbam = BamFile(self.input_parameters.out_bam_file, mode="w") outbam.write(header=header, body=body)
[docs] def __call__(self): inbam = BamFile(self.input_parameters.input_bam_file) filtered_body = self._apply_filters(inbam.body) self._write_output(inbam.header, filtered_body)
[docs]@high_level_handler def main(): cl_input = parse_cl() config_logging(cl_input.verbose) params = BamFilteringParameters(cl_input) logging.info(str(params)) bam_filter = BamFilter(params) bam_filter()