scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
scran_qc Namespace Reference

Simple quality control for single-cell data. More...

Classes

class  AdtQcBlockedFilters
 Filter on ADT-based QC metrics with blocking. More...
 
class  AdtQcFilters
 Filter for high-quality cells using ADT-based metrics. More...
 
struct  ChooseFilterThresholdsOptions
 Options for choose_filter_thresholds(). More...
 
struct  ChooseFilterThresholdsResults
 Results of compute_adt_qc_metrics(). More...
 
struct  ComputeAdtQcFiltersOptions
 Options for compute_adt_qc_filters(). More...
 
struct  ComputeAdtQcMetricsBuffers
 Buffers for compute_adt_qc_metrics(). More...
 
struct  ComputeAdtQcMetricsOptions
 Options for compute_adt_qc_metrics(). More...
 
struct  ComputeAdtQcMetricsResults
 Results of compute_adt_qc_metrics(). More...
 
struct  ComputeCrisprQcFiltersOptions
 Options for compute_crispr_qc_filters(). More...
 
struct  ComputeCrisprQcMetricsBuffers
 Buffers for compute_crispr_qc_metrics(). More...
 
struct  ComputeCrisprQcMetricsOptions
 Options for compute_crispr_qc_metrics(). More...
 
struct  ComputeCrisprQcMetricsResults
 Results of compute_crispr_qc_metrics(). More...
 
struct  ComputeRnaQcFiltersOptions
 Options for compute_rna_qc_filters(). More...
 
struct  ComputeRnaQcMetricsBuffers
 Buffers for compute_rna_qc_metrics(). More...
 
struct  ComputeRnaQcMetricsOptions
 Options for compute_rna_qc_metrics(). More...
 
struct  ComputeRnaQcMetricsResults
 Results of compute_rna_qc_metrics(). More...
 
class  CrisprQcBlockedFilters
 Filter on using CRISPR-based QC metrics with blocking. More...
 
class  CrisprQcFilters
 Filter for high-quality cells using CRISPR-based metrics. More...
 
struct  FindMedianMadOptions
 Options for find_median_mad(). More...
 
struct  FindMedianMadResults
 Results of find_median_mad(). More...
 
class  FindMedianMadWorkspace
 Temporary data structures for find_median_mad_blocked(). More...
 
struct  PerCellQcMetricsBuffers
 Buffers for per_cell_qc_metrics(). More...
 
struct  PerCellQcMetricsOptions
 Options for per_cell_qc_metrics(). More...
 
struct  PerCellQcMetricsResults
 Result store for QC metric calculations. More...
 
class  RnaQcBlockedFilters
 Filter for high-quality cells using RNA-based metrics with blocking. More...
 
class  RnaQcFilters
 Filter for high-quality cells using RNA-based metrics. More...
 

Functions

template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ >
void compute_adt_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Value_ , typename Index_ , typename Subset_ >
ComputeAdtQcMetricsResults< Sum_, Detected_ > compute_adt_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_ > compute_adt_qc_filters (const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_ > compute_adt_qc_filters (const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ >
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked (const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *const block, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ >
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked (const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ >
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds (const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
 
template<typename Float_ >
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds (const std::size_t num, Float_ *const metrics, const ChooseFilterThresholdsOptions &options)
 
template<typename Value_ , typename Float_ >
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds (const std::size_t num, const Value_ *const metrics, Float_ *const buffer, const ChooseFilterThresholdsOptions &options)
 
template<typename Float_ >
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked (const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
 
template<typename Value_ , typename Block_ , typename Float_ >
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked (const std::size_t num, const Value_ *const metrics, const Block_ *const block, FindMedianMadWorkspace< Float_ > *const workspace, const ChooseFilterThresholdsOptions &options)
 
template<typename Value_ , typename Index_ , typename Sum_ , typename Detected_ >
void compute_crispr_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > compute_crispr_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ >
CrisprQcFilters< Float_ > compute_crispr_qc_filters (const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ >
CrisprQcFilters< Float_ > compute_crispr_qc_filters (const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ >
CrisprQcBlockedFilters< Float_ > compute_crispr_qc_filters_blocked (const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ >
CrisprQcBlockedFilters< Float_ > compute_crispr_qc_filters_blocked (const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Float_ >
FindMedianMadResults< Float_ > find_median_mad (std::size_t num, Float_ *metrics, const FindMedianMadOptions &options)
 
template<typename Float_ = double, typename Value_ >
FindMedianMadResults< Float_ > find_median_mad (const std::size_t num, const Value_ *const metrics, Float_ *buffer, const FindMedianMadOptions &options)
 
template<typename Output_ = double, typename Value_ , typename Block_ >
std::vector< FindMedianMadResults< Output_ > > find_median_mad_blocked (const std::size_t num, const Value_ *const metrics, const Block_ *const block, FindMedianMadWorkspace< Output_ > *workspace, const FindMedianMadOptions &options)
 
template<typename Keep_ , typename Index_ >
void filter_index (const std::size_t num, const Keep_ *const filter, std::vector< Index_ > &output)
 
template<typename Index_ , typename Keep_ >
std::vector< Index_ > filter_index (const std::size_t num, const Keep_ *const filter)
 
template<typename Keep_ , typename Output_ >
void combine_filters (const std::size_t num, const std::vector< Keep_ * > &filters, Output_ *const output)
 
template<typename Output_ = unsigned char, typename Keep_ >
std::vector< Output_ > combine_filters (const std::size_t num, const std::vector< const Keep_ * > &filters)
 
template<typename Index_ , typename Keep_ >
void combine_filters_index (const Index_ num, const std::vector< const Keep_ * > &filters, std::vector< Index_ > &output)
 
template<typename Index_ , typename Keep_ >
std::vector< Index_ > combine_filters_index (const Index_ num, const std::vector< const Keep_ * > &filters)
 
template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ >
void per_cell_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Value_ , typename Index_ , typename Subset_ >
PerCellQcMetricsResults< Sum_, Detected_, Value_, Index_ > per_cell_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsOptions &options)
 
template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ , typename Proportion_ >
void compute_rna_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_ , typename Index_ , typename Subset_ >
ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > compute_rna_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ >
RnaQcFilters< Float_ > compute_rna_qc_filters (const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ >
RnaQcFilters< Float_ > compute_rna_qc_filters (const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ >
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked (const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const ComputeRnaQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ >
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked (const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const ComputeRnaQcFiltersOptions &options)
 

Detailed Description

Simple quality control for single-cell data.

Function Documentation

◆ compute_adt_qc_metrics() [1/2]

template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ >
void scran_qc::compute_adt_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const std::vector< Subset_ > & subsets,
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > & output,
const ComputeAdtQcMetricsOptions & options )

Given a feature-by-cell ADT count matrix, this function uses per_cell_qc_metrics() to compute several ADT-relevant QC metrics:

  • The sum of counts for each cell, which (in theory) represents the efficiency of library preparation and sequencing. Compared to RNA data, the sum is less useful as a QC metric for ADT data as it is strongly influenced by biological variation in the abundance of the targeted features. Nonetheless, we compute it for diagnostic purposes.
  • The number of detected tags per cell. Even though ADTs are typically used in situations where few features are highly abundant (e.g., cell type-specific markers), we still expect detectable coverage of most features due to ambient contamination, non-specific binding or some background expression. Low numbers of detected tags indicates that library preparation or sequencing depth was suboptimal.
  • The sum of counts in pre-defined feature subsets. The exact interpretation depends on the nature of the feature subset but the most common use case involves isotype control (IgG) features. IgG antibodies should not bind to anything so a high subset sum suggests that non-specific binding is a problem, e.g., due to antibody conjugates. (Unlike RNA quality control, we do not use proportions here as it is entirely possible for a cell to have low counts for other tags due to the absence of their targeted features; this would result in a high proportion even if the cell has a "normal" level of non-specific binding.)

We use these metrics to define thresholds for filtering in compute_adt_qc_filters().

Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Parameters
matA matrix of non-negative counts. Rows correspond to ADT features while columns correspond to cells.
[in]subsetsVector of feature subsets, typically IgG controls. See per_cell_qc_metrics() for more details on the expected format.
[out]outputComputeAdtQcMetricsBuffers object in which to store the output.
optionsFurther options.

◆ compute_adt_qc_metrics() [2/2]

template<typename Sum_ = double, typename Detected_ = int, typename Value_ , typename Index_ , typename Subset_ >
ComputeAdtQcMetricsResults< Sum_, Detected_ > scran_qc::compute_adt_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const std::vector< Subset_ > & subsets,
const ComputeAdtQcMetricsOptions & options )

Overload of compute_adt_qc_metrics() that allocates memory for the results.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Parameters
matA matrix of non-negative counts. Rows correspond to ADT features while columns correspond to cells.
[in]subsetsVector of feature subsets, typically IgG controls. See per_cell_qc_metrics() for more details on the expected format.
optionsFurther options.
Returns
An object containing the QC metrics.

◆ compute_adt_qc_filters() [1/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_ > scran_qc::compute_adt_qc_filters ( const std::size_t num,
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > & metrics,
const ComputeAdtQcFiltersOptions & options )

Given the ADT-relevant QC metrics from compute_adt_qc_metrics(), we consider low-quality cells to be those with a low number of detected tags and high subset sums.

For each subset's sum, we define the upper threshold using the MAD-based outlier approach implemented in choose_filter_thresholds(). This is done using the specified ComputeAdtQcFiltersOptions::num_mads and after log-transformation of the sums.

For the number of detected features, we define a lower threshold as the lower of:

  • The MAD-based outlier threshold from choose_filter_thresholds(), computed using the specified ComputeAdtQcFiltersOptions::num_mads and after log-transformation.
  • The product of the median number across all cells and 1 - ComputeAdtQcFiltersOptions::min_detected_drop.

So by default, cells are only considered to be low quality if the number of detected features drops 10% or more below the median. This avoids overly aggressive filtering when the MAD is zero due to the discrete nature of this statistic in datasets with few tags.

Template Parameters
Float_Floating-point type of the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Parameters
numNumber of cells.
metricsA collection of arrays containing ADT-based QC metrics, filled by compute_adt_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_adt_qc_filters() [2/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_ > scran_qc::compute_adt_qc_filters ( const ComputeAdtQcMetricsResults< Sum_, Detected_ > & metrics,
const ComputeAdtQcFiltersOptions & options )
Template Parameters
Float_Floating-point type of the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Parameters
metricsADT-based QC metrics from compute_adt_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_adt_qc_filters_blocked() [1/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ >
AdtQcBlockedFilters< Float_ > scran_qc::compute_adt_qc_filters_blocked ( const std::size_t num,
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > & metrics,
const Block_ *const block,
const ComputeAdtQcFiltersOptions & options )

This function computes filter thresholds for ADT-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_adt_qc_filters() on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked() for more details.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Block_Integer type of the block assignments.
Parameters
numNumber of cells.
metricsA collection of arrays containing ADT-based QC metrics, filled by compute_adt_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ compute_adt_qc_filters_blocked() [2/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ >
AdtQcBlockedFilters< Float_ > scran_qc::compute_adt_qc_filters_blocked ( const ComputeAdtQcMetricsResults< Sum_, Detected_ > & metrics,
const Block_ *const block,
const ComputeAdtQcFiltersOptions & options )
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Block_Integer type of the block assignments.
Parameters
metricsADT-based QC metrics computed by compute_adt_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ choose_filter_thresholds() [1/3]

template<typename Float_ >
ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds ( const FindMedianMadResults< Float_ > & mm,
const ChooseFilterThresholdsOptions & options )

We define filter thresholds on the QC metrics by assuming that most cells in the experiment are of high (or at least acceptable) quality. Any outlier values are indicative of low-quality cells that should be filtered out. Given an array of values, outliers are defined as those that are more than some number of median absolute deviations (MADs) from the median value. Outliers can be defined in both directions or just a single direction, depending on the interpretation of the QC metric. We can also apply a log-transformation to the metrics to identify outliers with respect to their fold-change from the median.

Template Parameters
Float_Floating-point type for the thresholds.
Parameters
mmMedian and MADc computed by find_median_mad(). If ChooseFilterThresholdsOptions::log = true, it is expected that the median and MAD are computed on the log-transformed metrics (i.e., FindMedianMadOptions::log = true).
optionsFurther options.
Returns
The upper and lower thresholds derived from mm.

◆ choose_filter_thresholds() [2/3]

template<typename Float_ >
ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds ( const std::size_t num,
Float_ *const metrics,
const ChooseFilterThresholdsOptions & options )

This overload computes the median and MAD via find_median_mad() before deriving thresholds with choose_filter_thresholds().

Template Parameters
Float_Floating-point type for the metrics and thresholds.
Parameters
numNumber of cells.
[in]metricsPointer to an array of length num, containing a QC metric for each cell. This is modified arbitrarily on output.
optionsFurther options.
Returns
The upper and lower thresholds derived from metrics.

◆ choose_filter_thresholds() [3/3]

template<typename Value_ , typename Float_ >
ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds ( const std::size_t num,
const Value_ *const metrics,
Float_ *const buffer,
const ChooseFilterThresholdsOptions & options )

Overload of choose_filter_thresholds() that uses an auxiliary buffer to avoid mutating metrics.

Template Parameters
Value_Type for the input data.
Float_Floating-point type for the metrics and thresholds.
Parameters
numNumber of cells.
[in]metricsPointer to an array of length num, containing a QC metric for each cell.
bufferPointer to an array of length num in which to store intermediate results. Alternatively NULL, in which case a buffer is automatically allocated.
optionsFurther options.
Returns
The upper and lower thresholds derived from metrics.

◆ choose_filter_thresholds_blocked() [1/2]

template<typename Float_ >
std::vector< ChooseFilterThresholdsResults< Float_ > > scran_qc::choose_filter_thresholds_blocked ( const std::vector< FindMedianMadResults< Float_ > > & mms,
const ChooseFilterThresholdsOptions & options )

For datasets with multiple blocks, we can compute block-specific thresholds for each metric. This is equivalent to calling choose_filter_thresholds() on the cells for each block. Our assumption is that differences in the metric distributions between blocks are driven by uninteresting causes (e.g., differences in sequencing depth); variable thresholds can adapt to each block's distribution for effective removal of outliers.

That said, if the differences in the distributions between blocks are interesting, it may be preferable to ignore the blocking factor and just use choose_filter_thresholds() instead. This ensures that the MADs are increased appropriately to avoid filtering out interesting variation.

Template Parameters
Float_Floating-point type for the thresholds.
Parameters
mmsVector of medians and MADs for each block.
optionsFurther options.
Returns
A vector containing the upper and lower thresholds for each block.

◆ choose_filter_thresholds_blocked() [2/2]

template<typename Value_ , typename Block_ , typename Float_ >
std::vector< ChooseFilterThresholdsResults< Float_ > > scran_qc::choose_filter_thresholds_blocked ( const std::size_t num,
const Value_ *const metrics,
const Block_ *const block,
FindMedianMadWorkspace< Float_ > *const workspace,
const ChooseFilterThresholdsOptions & options )

This overload computes the median and MAD for each block via find_median_mad_blocked() before deriving thresholds in each block with choose_filter_thresholds_blocked().

Template Parameters
Value_Type for the input data.
Float_Floating-point type for the metrics and thresholds.
Parameters
numNumber of cells.
[in]metricsPointer to an array of length num, containing a QC metric for each cell.
[in]blockOptional pointer to an array of block identifiers, see find_median_mad_blocked() for details.
workspacePointer to a workspace object, see find_median_mad_blocked() for details.
optionsFurther options.
Returns
A vector containing the upper and lower thresholds for each block.

◆ compute_crispr_qc_metrics() [1/2]

template<typename Value_ , typename Index_ , typename Sum_ , typename Detected_ >
void scran_qc::compute_crispr_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & output,
const ComputeCrisprQcMetricsOptions & options )

Given a guide-by-cell count matrix, this function uses per_cell_qc_metrics() to compute several CRISPR-relevant QC metrics:

  • The sum of counts for each cell. Low counts indicate that the cell was not successfully transfected with a construct or that library preparation and sequencing failed.
  • The number of detected guides per cell. In theory, this should be 1, as each cell should express no more than one guide construct. However, ambient contamination may introduce non-zero counts for multiple guides, without necessarily interfering with downstream analyses. As such, this metric is less useful for guide data, though we compute it anyway.
  • The maximum count in the most abundant guide construct. Low values indicate that the cell was not successfully transfected or that library preparation and sequencing failed. The identity of the most abundant guide is also reported.

We use these metrics to define thresholds for filtering in compute_crispr_qc_filters().

Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Parameters
matA matrix of non-negative counts. Rows correspond to CRISPR guides while columns correspond to cells.
[out]outputComputeCrisprQcMetricsBuffers object in which to store the output.
optionsFurther options.

◆ compute_crispr_qc_metrics() [2/2]

template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > scran_qc::compute_crispr_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const ComputeCrisprQcMetricsOptions & options )

Overload of compute_crispr_qc_metrics() that allocates memory for the results.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Parameters
matA matrix of non-negative counts. Each row should correspond to a CRISPR guide while each column should correspond to a cell.
optionsFurther options.
Returns
An object containing the QC metrics.

◆ compute_crispr_qc_filters() [1/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ >
CrisprQcFilters< Float_ > scran_qc::compute_crispr_qc_filters ( const std::size_t num,
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & metrics,
const ComputeCrisprQcFiltersOptions & options )

In CRISPR data, a cell is considered to be of low quality if it has a low count for its most abundant guide. However, directly applying choose_filter_thresholds() on the maximum count is somewhat tricky as unsuccessful transfection can be common. This results in a large subpopulation with low maximum counts, inflating the MAD and compromising the threshold calculation. Instead, we use the following approach:

  1. Compute the proportion of counts in the most abundant guide (i.e., the maximum proportion) in each cell. Cells that were successfully transfected should have high maximum proportions. In contrast, unsuccessfully transfected cells will be dominated by ambient contamination and have low proportions.
  2. Subset the dataset to only retain those cells with maximum proportions above the median. This assumes that at least 50% of cells were successfully transfected. Thus, we remove all of the unsucessful transfections and enrich for mostly-high-quality cells.
  3. Define a MAD-based threshold for low outliers on the log-transformed maximum count within the subset (see choose_filter_thresholds() for details). This is now possible as we can assume that most of the remaining cells are of high quality.

Note that the maximum proportion is only used to define the subset for threshold calculation. Once the maximum count threshold is computed, it is applied to all cells regardless of their maximum proportions. This ensures that we correctly remove cells with low coverage, even if the proportion is high. It also allows us to retain cells transfected with multiple guides, as long as the maximum is high enough - such cells are not necessarily uninteresting, e.g., for examining interaction effects, so we will err on the side of caution and leave them in.

Template Parameters
Float_Floating-point type of the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Parameters
numNumber of cells.
metricsA collection of arrays containing CRISPR-based QC metrics, filled by compute_crispr_qc_metrics().
optionsFurther options for filtering.
Returns
Object containing filter thresholds.

◆ compute_crispr_qc_filters() [2/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ >
CrisprQcFilters< Float_ > scran_qc::compute_crispr_qc_filters ( const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > & metrics,
const ComputeCrisprQcFiltersOptions & options )
Template Parameters
Float_Floating-point type of the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Parameters
metricsCRISPR-based QC metrics from compute_crispr_qc_metrics().
optionsFurther options for filtering.
Returns
Object containing filter thresholds.

◆ compute_crispr_qc_filters_blocked() [1/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ >
CrisprQcBlockedFilters< Float_ > scran_qc::compute_crispr_qc_filters_blocked ( const std::size_t num,
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & metrics,
const Block_ *const block,
const ComputeCrisprQcFiltersOptions & options )

This function computes filter thresholds for CRISPR-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_crispr_qc_filters() on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked() for more details.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Block_Integer type of the block assignments.
Parameters
numNumber of cells.
metricsA collection of arrays containing CRISPR-based QC metrics, filled by compute_crispr_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ compute_crispr_qc_filters_blocked() [2/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ >
CrisprQcBlockedFilters< Float_ > scran_qc::compute_crispr_qc_filters_blocked ( const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > & metrics,
const Block_ *const block,
const ComputeCrisprQcFiltersOptions & options )
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Block_Integer type of the block assignments.
Parameters
metricsCRISPR-based QC metrics computed by compute_crispr_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ find_median_mad() [1/2]

template<typename Float_ >
FindMedianMadResults< Float_ > scran_qc::find_median_mad ( std::size_t num,
Float_ * metrics,
const FindMedianMadOptions & options )

Pretty much as it says on the can; calculates the median of an array of values first, and uses the median to then compute the median absolute deviation (MAD) from that array.

Template Parameters
Float_Floating-point type of the input and output.
Parameters
numNumber of observations.
[in]metricsPointer to an array of observations of length num. NaNs are ignored. Array contents are arbitrarily modified on function return and should not be used afterwards.
optionsFurther options.
Returns
Median and MAD for metrics, possibly after log-transformation.

◆ find_median_mad() [2/2]

template<typename Float_ = double, typename Value_ >
FindMedianMadResults< Float_ > scran_qc::find_median_mad ( const std::size_t num,
const Value_ *const metrics,
Float_ * buffer,
const FindMedianMadOptions & options )

Overload of find_median_mad() that uses an auxiliary buffer to avoid mutating the input array of values.

Template Parameters
Value_Type for the input.
Float_Floating-point type of the output.
Parameters
numNumber of observations.
[in]metricsPointer to an array of observations of length num. NaNs are ignored.
[out]bufferPointer to an array of length num, containing a buffer to use for storing intermediate results. Array contents are arbitrarily modified on function return and should not be used afterwards. This can also be NULL in which case a buffer is allocated.
optionsFurther options.
Returns
Median and MAD for metrics, possibly after log-transformation.

◆ find_median_mad_blocked()

template<typename Output_ = double, typename Value_ , typename Block_ >
std::vector< FindMedianMadResults< Output_ > > scran_qc::find_median_mad_blocked ( const std::size_t num,
const Value_ *const metrics,
const Block_ *const block,
FindMedianMadWorkspace< Output_ > * workspace,
const FindMedianMadOptions & options )

For blocked datasets, this function computes the median and MAD for each block. It is equivalent to calling find_median_mad() separately on all observations from each block.

Template Parameters
Output_Floating-point type of the output.
Block_Integer type, containing the block IDs.
Value_Numeric type of the input.
Parameters
numNumber of observations.
[in]metricsPointer to an array of observations of length num. NaNs are ignored.
[in]blockOptional pointer to an array of block identifiers. If provided, the array should be of length equal to num. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. If a null pointer is supplied, all observations are assumed to belong to the same block.
workspacePointer to a workspace object, either (i) constructed on num and block or (ii) configured using FindMedianMadWorkspace::set() on num and block. The same object can be re-used across multiple calls to find_median_mad_blocked() with the same num and block. This can also be NULL in which case a new workspace is allocated.
optionsFurther options.
Returns
Vector of length \(N\), where each entry contains the median and MAD for each block in block.

◆ filter_index() [1/2]

template<typename Keep_ , typename Index_ >
void scran_qc::filter_index ( const std::size_t num,
const Keep_ *const filter,
std::vector< Index_ > & output )

Convert the filtering vectors produced by compute_rna_qc_filters() and friends into formats that can be used for downstream analysis. In particular, we want to slice the original feature-by-cell matrix so only the high-quality subset of cells are retained. This is most easily done by using tatami::make_DelayedSubset() to subset the tatami::Matrix with the indices of the high-quality cells. For this purpose, we can use filter_index() to convert the boolean filtering vector into a vector of sorted and unique column indices.

Template Parameters
Keep_Boolean type of the filter.
Index_Integer type of array indices.
Parameters
numNumber of cells in the dataset.
[in]filterPointer to an array of length num, indicating whether a cell is of high quality.
[out]outputOn output, a vector of sorted and unique indices of the cells considered to be high quality.

◆ filter_index() [2/2]

template<typename Index_ , typename Keep_ >
std::vector< Index_ > scran_qc::filter_index ( const std::size_t num,
const Keep_ *const filter )

Overload of filter_index() that returns a vector directly.

Template Parameters
Index_Integer type of array indices.
Keep_Boolean type of each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filterPointer to an array of length num, indicating whether a cell is of high quality.
Returns
Vector of sorted and unique indices of the cells considered to be high quality.

◆ combine_filters() [1/2]

template<typename Keep_ , typename Output_ >
void scran_qc::combine_filters ( const std::size_t num,
const std::vector< Keep_ * > & filters,
Output_ *const output )

When dealing with multiple filters from different modalities (e.g., CrisprQcFilters::filter(), AdtQcFilters::filter()), our default strategy is to take the intersection, i.e., we only retain cells that are considered to be high quality in all modalities. This ensures that downstream analyses can be safely performed on each modality in the filtered dataset.

Template Parameters
Keep_Boolean type of each filter modality.
Output_Boolean type of the output.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
[out]outputPointer to an array of length num. On output, this is filled with truthy values only for cells that are high quality in all modalities.

◆ combine_filters() [2/2]

template<typename Output_ = unsigned char, typename Keep_ >
std::vector< Output_ > scran_qc::combine_filters ( const std::size_t num,
const std::vector< const Keep_ * > & filters )

Overload of combine_filters() that returns a vector directly.

Template Parameters
Output_Boolean type of the output.
Keep_Boolean type of each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
Returns
Vector of length num, indicating which cells are high quality in all modalities.

◆ combine_filters_index() [1/2]

template<typename Index_ , typename Keep_ >
void scran_qc::combine_filters_index ( const Index_ num,
const std::vector< const Keep_ * > & filters,
std::vector< Index_ > & output )

This has the same behavior as combine_filters() followed by filter_index().

Template Parameters
Keep_Boolean type of each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
[out]outputOn output, a vector of sorted and unique indices of the cells considered to be high quality in all modalities.

◆ combine_filters_index() [2/2]

template<typename Index_ , typename Keep_ >
std::vector< Index_ > scran_qc::combine_filters_index ( const Index_ num,
const std::vector< const Keep_ * > & filters )

Overload of combine_filters_index() that returns a vector directly.

Template Parameters
Index_Integer type of array indices.
Keep_Boolean type of each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
Returns
Vector of sorted and unique indices of the cells considered to be high quality in all modalities.

◆ per_cell_qc_metrics() [1/2]

template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ >
void scran_qc::per_cell_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const std::vector< Subset_ > & subsets,
const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & output,
const PerCellQcMetricsOptions & options )

Given a feature-by-cell expression matrix (usually containing non-negative counts), we compute several QC metrics:

  • The sum of expression values for each cell, which represents the efficiency of library preparation and sequencing. Low sums indicate that the library was not successfully captured.
  • The number of detected features, i.e., with non-zero counts. This also quantifies the library preparation efficiency, but with a greater focus on capturing the transcriptional complexity.
  • The maximum value across all features. This is useful in situations where only one feature is expected to be present, e.g., CRISPR guides, hash tags.
  • The row index of the feature with the maximum count. If multiple features are tied for the maximum count, the earliest feature is reported.
  • The sum of expression values in pre-defined feature subsets. The exact interpretation depends on the nature of the subset - for example, one subset for RNA data will typically contain all genes on the mitochondrial chromosome, where higher proportions of counts in the mitochondrial subset indicate cell damage due to loss of cytoplasmic transcripts. Spike-in proportions can be interpreted in a similar manner.
  • The number of detected features in pre-defined feature subsets. Analogous to the number of detected features for the entire feature space.
Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a std::vector of indices.
Sum_Floating point type to store the sums.
Detected_Integer type to store the number of detected cells.
Parameters
matA matrix of non-negative counts. Rows should correspond to features (e.g., genes) while columns should correspond to cells.
[in]subsetsVector of feature subsets, where each entry represents a feature subset and may be either:
  • A pointer to an array of length equal to mat.nrow() where each entry is interpretable as a boolean. This indicates whether each row in mat belongs to the subset.
  • A std::vector containing sorted and unique row indices. This specifies the rows in mat that belong to the subset.
[out]outputCollection of buffers in which the computed statistics are to be stored.
optionsFurther options.

◆ per_cell_qc_metrics() [2/2]

template<typename Sum_ = double, typename Detected_ = int, typename Value_ , typename Index_ , typename Subset_ >
PerCellQcMetricsResults< Sum_, Detected_, Value_, Index_ > scran_qc::per_cell_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const std::vector< Subset_ > & subsets,
const PerCellQcMetricsOptions & options )
Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a std::vector of indices.
Sum_Floating point type to store the sums.
Detected_Integer type to store the number of detected cells.
Parameters
matA matrix of non-negative counts. Rows should correspond to features (e.g., genes) while columns should correspond to cells.
[in]subsetsVector of feature subsets, where each entry represents a feature subset and may be either:
  • A pointer to an array of length equal to mat.nrow() where each entry is interpretable as a boolean. This indicates whether each row in mat belongs to the subset.
  • A std::vector containing sorted and unique row indices. This specifies the rows in mat that belong to the subset.
optionsFurther options.
Returns
Object containing the QC metrics. Not all metrics may be computed depending on options.

◆ compute_rna_qc_metrics() [1/2]

template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ , typename Proportion_ >
void scran_qc::compute_rna_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const std::vector< Subset_ > & subsets,
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > & output,
const ComputeRnaQcMetricsOptions & options )

Given a feature-by-cell RNA count matrix, we compute several metrics for filtering high-quality cells:

  • The total sum of counts for each cell, which represents the efficiency of library preparation and sequencing. Low totals indicate that the library was not successfully captured.
  • The number of detected features. This also quantifies library preparation efficiency but with greater focus on capturing transcriptional complexity.
  • The proportion of counts in pre-defined feature subsets, the exact interpretation of which depends on the nature of the subset. Typically, one subset contains all genes on the mitochondrial chromosome, where higher proportions are representative of cell damage; the assumption is that cytoplasmic transcripts leak through tears in the cell membrane while the mitochondria are still trapped inside. The proportion of spike-in transcripts can be interpreted in a similar manner, where the loss of endogenous transcripts results in higher spike-in proportions.

We use these metrics to define thresholds for filtering in compute_rna_qc_filters().

Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Parameters
matA matrix of non-negative counts. Rows should correspond to genes while columns should correspond to cells.
[in]subsetsVector of feature subsets, typically mitochondrial genes or spike-in transcripts. See the argument of the same name in per_cell_qc_metrics() for more details on the expected format.
[out]outputCollection of buffers in which to store the output.
optionsFurther options.

◆ compute_rna_qc_metrics() [2/2]

template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_ , typename Index_ , typename Subset_ >
ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > scran_qc::compute_rna_qc_metrics ( const tatami::Matrix< Value_, Index_ > & mat,
const std::vector< Subset_ > & subsets,
const ComputeRnaQcMetricsOptions & options )

Overload of compute_rna_qc_metrics() that allocates memory for the results.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a std::vector of indices.
Parameters
matA matrix of non-negative counts. Rows should correspond to genes while columns should correspond to cells.
[in]subsetsVector of feature subsets, typically mitochondrial genes or spike-in transcripts. See the argument of the same name in per_cell_qc_metrics() for more details on the expected format.
optionsFurther options.
Returns
An object containing the QC metrics. Subset proportions are returned depending on the subsets.

◆ compute_rna_qc_filters() [1/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ >
RnaQcFilters< Float_ > scran_qc::compute_rna_qc_filters ( const std::size_t num,
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > & metrics,
const ComputeRnaQcFiltersOptions & options )

Given the RNA-relevant QC metrics from compute_rna_qc_metrics(), we consider low-quality cells to be those with a low sum, a low number of detected genes, or high subset proportions. We define thresholds for each metric using the MAD-based outlier approach implemented in choose_filter_thresholds(). For the total counts and number of detected features, the outliers are defined after log-transformation of the metrics.

Template Parameters
Float_Floating-point type of the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Parameters
numNumber of cells.
metricsA collection of buffers containing RNA-based QC metrics, filled by compute_rna_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_rna_qc_filters() [2/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ >
RnaQcFilters< Float_ > scran_qc::compute_rna_qc_filters ( const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > & metrics,
const ComputeRnaQcFiltersOptions & options )

This function computes filter thresholds for RNA-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_rna_qc_filters() on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked() for more details.

Template Parameters
Float_Floating-point type of the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Parameters
metricsRNA-based QC metrics from compute_rna_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_rna_qc_filters_blocked() [1/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ >
RnaQcBlockedFilters< Float_ > scran_qc::compute_rna_qc_filters_blocked ( const std::size_t num,
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > & metrics,
const Block_ *const block,
const ComputeRnaQcFiltersOptions & options )
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Block_Integer type of the block assignments.
Parameters
numNumber of cells.
metricsA collection of buffers containing RNA-based QC metrics, filled by compute_rna_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ compute_rna_qc_filters_blocked() [2/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ >
RnaQcBlockedFilters< Float_ > scran_qc::compute_rna_qc_filters_blocked ( const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > & metrics,
const Block_ *const block,
const ComputeRnaQcFiltersOptions & options )
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Block_Integer type of the block assignments.
Parameters
metricsRNA-based QC metrics computed by compute_rna_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.