scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
Classes | Functions
scran_qc Namespace Reference

Simple quality control for single-cell data. More...

Classes

class  AdtQcBlockedFilters
 Filter on ADT-based QC metrics with blocking. More...
 
class  AdtQcFilters
 Filter for high-quality cells using ADT-based metrics. More...
 
struct  ChooseFilterThresholdsOptions
 Options for choose_filter_thresholds(). More...
 
struct  ChooseFilterThresholdsResults
 Results of compute_adt_qc_metrics(). More...
 
struct  ComputeAdtQcFiltersOptions
 Options for compute_adt_qc_filters(). More...
 
struct  ComputeAdtQcMetricsBuffers
 Buffers for compute_adt_qc_metrics(). More...
 
struct  ComputeAdtQcMetricsOptions
 Options for compute_adt_qc_metrics(). More...
 
struct  ComputeAdtQcMetricsResults
 Results of compute_adt_qc_metrics(). More...
 
struct  ComputeCrisprQcFiltersOptions
 Options for compute_crispr_qc_filters(). More...
 
struct  ComputeCrisprQcMetricsBuffers
 Buffers for compute_crispr_qc_metrics(). More...
 
struct  ComputeCrisprQcMetricsOptions
 Options for compute_crispr_qc_metrics(). More...
 
struct  ComputeCrisprQcMetricsResults
 Results of compute_crispr_qc_metrics(). More...
 
struct  ComputeRnaQcFiltersOptions
 Options for compute_rna_qc_filters(). More...
 
struct  ComputeRnaQcMetricsBuffers
 Buffers for compute_rna_qc_metrics(). More...
 
struct  ComputeRnaQcMetricsOptions
 Options for compute_rna_qc_metrics(). More...
 
struct  ComputeRnaQcMetricsResults
 Results of compute_rna_qc_metrics(). More...
 
class  CrisprQcBlockedFilters
 Filter on using CRISPR-based QC metrics with blocking. More...
 
class  CrisprQcFilters
 Filter for high-quality cells using CRISPR-based metrics. More...
 
struct  FindMedianMadOptions
 Options for find_median_mad(). More...
 
struct  FindMedianMadResults
 Results of find_median_mad(). More...
 
class  FindMedianMadWorkspace
 Temporary data structures for find_median_mad_blocked(). More...
 
struct  PerCellQcMetricsBuffers
 Buffers for per_cell_qc_metrics(). More...
 
struct  PerCellQcMetricsOptions
 Options for per_cell_qc_metrics(). More...
 
struct  PerCellQcMetricsResults
 Result store for QC metric calculations. More...
 
class  RnaQcBlockedFilters
 Filter for high-quality cells using RNA-based metrics with blocking. More...
 
class  RnaQcFilters
 Filter for high-quality cells using RNA-based metrics. More...
 

Functions

template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ >
void compute_adt_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
ComputeAdtQcMetricsResults< Sum_, Detected_compute_adt_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_compute_adt_qc_filters (size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_compute_adt_qc_filters (const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ >
AdtQcBlockedFilters< Float_compute_adt_qc_filters_blocked (size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ >
AdtQcBlockedFilters< Float_compute_adt_qc_filters_blocked (const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *block, const ComputeAdtQcFiltersOptions &options)
 
template<typename Float_ >
ChooseFilterThresholdsResults< Float_choose_filter_thresholds (const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
 
template<typename Index_ , typename Float_ >
ChooseFilterThresholdsResults< Float_choose_filter_thresholds (Index_ num, Float_ *metrics, const ChooseFilterThresholdsOptions &options)
 
template<typename Index_ , typename Value_ , typename Float_ >
ChooseFilterThresholdsResults< Float_choose_filter_thresholds (Index_ num, const Value_ *metrics, Float_ *buffer, const ChooseFilterThresholdsOptions &options)
 
template<typename Float_ >
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked (const std::vector< FindMedianMadResults< Float_ > > mms, const ChooseFilterThresholdsOptions &options)
 
template<typename Index_ , typename Value_ , typename Block_ , typename Float_ >
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked (Index_ num, const Value_ *metrics, const Block_ *block, FindMedianMadWorkspace< Float_, Index_ > *workspace, const ChooseFilterThresholdsOptions &options)
 
template<typename Value_ , typename Index_ , typename Sum_ , typename Detected_ >
void compute_crispr_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_compute_crispr_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ >
CrisprQcFilters< Float_compute_crispr_qc_filters (size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
CrisprQcFilters< Float_compute_crispr_qc_filters (const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ >
CrisprQcBlockedFilters< Float_compute_crispr_qc_filters_blocked (size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ >
CrisprQcBlockedFilters< Float_compute_crispr_qc_filters_blocked (const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, const ComputeCrisprQcFiltersOptions &options)
 
template<typename Index_ , typename Float_ >
FindMedianMadResults< Float_find_median_mad (Index_ num, Float_ *metrics, const FindMedianMadOptions &options)
 
template<typename Float_ = double, typename Index_ , typename Value_ >
FindMedianMadResults< Float_find_median_mad (Index_ num, const Value_ *metrics, Float_ *buffer, const FindMedianMadOptions &options)
 
template<typename Output_ = double, typename Index_ , typename Value_ , typename Block_ >
std::vector< FindMedianMadResults< Output_ > > find_median_mad_blocked (Index_ num, const Value_ *metrics, const Block_ *block, FindMedianMadWorkspace< Output_, Index_ > *workspace, const FindMedianMadOptions &options)
 
template<typename Index_ , typename Keep_ >
void filter_index (Index_ num, const Keep_ *filter, std::vector< Index_ > &output)
 
template<typename Index_ , typename Keep_ >
std::vector< Index_filter_index (Index_ num, const Keep_ *filter)
 
template<typename Keep_ , typename Output_ >
void combine_filters (size_t num, const std::vector< Keep_ * > &filters, Output_ *output)
 
template<typename Output_ = uint8_t, typename Keep_ = uint8_t>
std::vector< Output_combine_filters (size_t num, const std::vector< const Keep_ * > &filters)
 
template<typename Index_ , typename Keep_ >
void combine_filters_index (Index_ num, const std::vector< const Keep_ * > &filters, std::vector< Index_ > &output)
 
template<typename Index_ , typename Keep_ >
std::vector< Index_combine_filters_index (Index_ num, const std::vector< const Keep_ * > &filters)
 
template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ >
void per_cell_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Value_ , typename Index_ , typename Subset_ >
PerCellQcMetricsResults< Sum_, Detected_, Value_, Index_per_cell_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsOptions &options)
 
template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ , typename Proportion_ >
void compute_rna_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
 
template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_compute_rna_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsOptions &options)
 
template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
RnaQcFilters< Float_compute_rna_qc_filters (size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
RnaQcFilters< Float_compute_rna_qc_filters (const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ >
RnaQcBlockedFilters< Float_compute_rna_qc_filters_blocked (size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, const ComputeRnaQcFiltersOptions &options)
 
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ >
RnaQcBlockedFilters< Float_compute_rna_qc_filters_blocked (const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, const ComputeRnaQcFiltersOptions &options)
 

Detailed Description

Simple quality control for single-cell data.

Function Documentation

◆ compute_adt_qc_metrics() [1/2]

void scran_qc::compute_adt_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const std::vector< Subset_ > &  subsets,
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &  output,
const ComputeAdtQcMetricsOptions options 
)

Given a feature-by-cell ADT count matrix, this function uses per_cell_qc_metrics() to compute several ADT-relevant QC metrics:

  • The sum of counts for each cell, which (in theory) represents the efficiency of library preparation and sequencing. This is less useful as a QC metric for ADT data given that the sum is strongly influenced by biological variation in the abundance of the targeted features. Nonetheless, we compute it for diagnostic purposes.
  • The number of detected tags per cell. Even though ADTs are commonly applied in situations where few features are highly abundant, we still expect detectable coverage of most features due to ambient contamination, non-specific binding or some background expression. The absence of detectable coverage indicates that library preparation or sequencing depth was suboptimal.
  • The sum of counts in pre-defined feature subsets. While the exact interpretation depends on the nature of the subset, the most common use case involves isotype control (IgG) features. IgG antibodies should not bind to anything, so high coverage suggests that non-specific binding is a problem, e.g., due to antibody conjugates. (We do not use proportions here, as it is entirely possible for a cell to have no counts for other tags due to the absence of their targeted features; this would result in a high proportion even if the cell has a "normal" level of non-specific binding.)

We use these metrics to define thresholds for filtering in compute_adt_qc_filters().

Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Parameters
matA tatami matrix containing count data. Rows correspond to ADT features while columns correspond to cells.
[in]subsetsVector of feature subsets, typically IgG controls. See per_cell_qc_metrics() for more details on the expected format.
[out]outputComputeAdtQcMetricsBuffers object in which to store the output.
optionsFurther options.

◆ compute_adt_qc_metrics() [2/2]

template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
ComputeAdtQcMetricsResults< Sum_, Detected_ > scran_qc::compute_adt_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const std::vector< Subset_ > &  subsets,
const ComputeAdtQcMetricsOptions options 
)

Overload of compute_adt_qc_metrics() that allocates memory for the results.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Parameters
matA tatami matrix containing count data. Rows correspond to ADT features while columns correspond to cells.
[in]subsetsVector of feature subsets, typically IgG controls. See per_cell_qc_metrics() for more details on the expected format.
optionsFurther options.
Returns
An object containing the QC metrics.

◆ compute_adt_qc_filters() [1/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_ > scran_qc::compute_adt_qc_filters ( size_t  num,
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &  metrics,
const ComputeAdtQcFiltersOptions options 
)

Using the ADT-relevant QC metrics from compute_adt_qc_metrics(), we consider low-quality cells to be those with a low number of detected tags and high subset sums. We define thresholds for each metric using an MAD-based outlier approach (see choose_filter_thresholds() for details). For the number of detected features and the subset sums, the outliers are defined after log-transformation of the metrics.

For the number of detected features, we supplement the MAD-based threshold with a minimum drop in the proportion from the median. That is, cells are only considered to be low quality if the difference in the number of detected features from the median is greater than a certain percentage. By default, the number must drop by at least 10% from the median. This avoids overly aggressive filtering when the MAD is zero due to the discrete nature of this statistic in datasets with few tags.

Template Parameters
Float_Floating-point type for the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Parameters
numNumber of cells.
metricsA collection of arrays containing ADT-based QC metrics, filled by compute_adt_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_adt_qc_filters() [2/2]

template<typename Float_ = double, typename Sum_ , typename Detected_ >
AdtQcFilters< Float_ > scran_qc::compute_adt_qc_filters ( const ComputeAdtQcMetricsResults< Sum_, Detected_ > &  metrics,
const ComputeAdtQcFiltersOptions options 
)
Template Parameters
Float_Floating-point type for the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Parameters
metricsADT-based QC metrics from compute_adt_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_adt_qc_filters_blocked() [1/2]

AdtQcBlockedFilters< Float_ > scran_qc::compute_adt_qc_filters_blocked ( size_t  num,
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &  metrics,
const Block_ block,
const ComputeAdtQcFiltersOptions options 
)

This function computes filter thresholds for ADT-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_adt_qc_filters() on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked() for more details.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Block_Integer type for the block assignments.
Parameters
numNumber of cells.
metricsA collection of arrays containing ADT-based QC metrics, filled by compute_adt_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ compute_adt_qc_filters_blocked() [2/2]

AdtQcBlockedFilters< Float_ > scran_qc::compute_adt_qc_filters_blocked ( const ComputeAdtQcMetricsResults< Sum_, Detected_ > &  metrics,
const Block_ block,
const ComputeAdtQcFiltersOptions options 
)
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Block_Integer type for the block assignments.
Parameters
metricsADT-based QC metrics computed by compute_adt_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ choose_filter_thresholds() [1/3]

template<typename Float_ >
ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds ( const FindMedianMadResults< Float_ > &  mm,
const ChooseFilterThresholdsOptions options 
)

We define filter thresholds on the QC metrics by assuming that most cells in the experiment are of high (or at least acceptable) quality. Any outlier values are indicative of low-quality cells that should be filtered out. Given an array of values, outliers are defined as those that are more than some number of median absolute deviations (MADs) from the median value. Outliers can be defined in both directions or just a single direction, depending on the interpretation of the QC metric. We can also apply a log-transformation to the metrics to identify outliers with respect to their fold-change from the median.

Template Parameters
Float_Floating-point type for the thresholds.
Parameters
mmMedian and MADc computed by find_median_mad(). If ChooseFilterThresholdsOptions::log = true, it is expected that the median and MAD are computed on the log-transformed metrics (i.e., FindMedianMadOptions::log = true).
optionsFurther options.
Returns
The upper and lower thresholds derived from mm.

◆ choose_filter_thresholds() [2/3]

ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds ( Index_  num,
Float_ metrics,
const ChooseFilterThresholdsOptions options 
)

This overload computes the median and MAD via find_median_mad() before deriving thresholds with choose_filter_thresholds().

Template Parameters
Index_Integer type for the array indices.
Float_Floating-point type for the metrics and thresholds.
Parameters
numNumber of cells.
[in]metricsPointer to an array of length num, containing a QC metric for each cell. This is modified arbitrarily on output.
optionsFurther options.
Returns
The upper and lower thresholds derived from metrics.

◆ choose_filter_thresholds() [3/3]

ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds ( Index_  num,
const Value_ metrics,
Float_ buffer,
const ChooseFilterThresholdsOptions options 
)

Overload of choose_filter_thresholds() that uses an auxiliary buffer to avoid mutating metrics.

Template Parameters
Index_Integer type for the array indices.
Value_Type for the input data.
Float_Floating-point type for the metrics and thresholds.
Parameters
numNumber of cells.
[in]metricsPointer to an array of length num, containing a QC metric for each cell.
bufferPointer to an array of length num in which to store intermediate results. Alternatively NULL, in which case a buffer is automatically allocated.
optionsFurther options.
Returns
The upper and lower thresholds derived from metrics.

◆ choose_filter_thresholds_blocked() [1/2]

template<typename Float_ >
std::vector< ChooseFilterThresholdsResults< Float_ > > scran_qc::choose_filter_thresholds_blocked ( const std::vector< FindMedianMadResults< Float_ > >  mms,
const ChooseFilterThresholdsOptions options 
)

For datasets with multiple blocks, we can compute block-specific thresholds for each metric. This is equivalent to calling choose_filter_thresholds() on the cells for each block. Our assumption is that differences in the metric distributions between blocks are driven by uninteresting causes (e.g., differences in sequencing depth); variable thresholds can adapt to each block's distribution for effective removal of outliers.

That said, if the differences in the distributions between blocks are interesting, it may be preferable to ignore the blocking factor and just use choose_filter_thresholds() instead. This ensures that the MADs are increased appropriately to avoid filtering out interesting variation.

Template Parameters
Float_Floating-point type for the thresholds.
Parameters
mmsVector of medians and MADs for each block.
optionsFurther options.
Returns
A vector containing the upper and lower thresholds for each block.

◆ choose_filter_thresholds_blocked() [2/2]

std::vector< ChooseFilterThresholdsResults< Float_ > > scran_qc::choose_filter_thresholds_blocked ( Index_  num,
const Value_ metrics,
const Block_ block,
FindMedianMadWorkspace< Float_, Index_ > *  workspace,
const ChooseFilterThresholdsOptions options 
)

This overload computes the median and MAD for each block via find_median_mad_blocked() before deriving thresholds in each block with choose_filter_thresholds_blocked().

Template Parameters
Index_Integer type for the array indices.
Value_Type for the input data.
Float_Floating-point type for the metrics and thresholds.
Parameters
numNumber of cells.
[in]metricsPointer to an array of length num, containing a QC metric for each cell.
[in]blockOptional pointer to an array of block identifiers, see find_median_mad_blocked() for details.
workspacePointer to a workspace object, see find_median_mad_blocked() for details.
optionsFurther options.
Returns
A vector containing the upper and lower thresholds for each block.

◆ compute_crispr_qc_metrics() [1/2]

void scran_qc::compute_crispr_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &  output,
const ComputeCrisprQcMetricsOptions options 
)

Given a feature-by-cell guide count matrix, this function uses per_cell_qc_metrics() to compute several CRISPR-relevant QC metrics:

  • The sum of counts for each cell. Low counts indicate that the cell was not successfully transfected with a construct, or that library preparation and sequencing failed.
  • The number of detected guides per cell. In theory, this should be 1, as each cell should express no more than one guide construct. However, ambient contamination may introduce non-zero counts for multiple guides, without necessarily interfering with downstream analyses. As such, this metric is less useful for guide data, though we compute it anyway.
  • The maximum count in the most abundant guide construct. Low values indicate that the cell was not successfully transfected, or that library preparation and sequencing failed. The identity of the most abundant guide is also reported.

We use these metrics to define thresholds for filtering in compute_crispr_qc_filters().

Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Sum_Numeric type to store the summed expression.

Meaningful instances of this object should generally be constructed by calling the compute_crispr_qc_metrics() function.

Template Parameters
Detected_Integer type to store the number of cells.
Parameters
matA tatami matrix containing count data. Rows correspond to CRISPR guides while columns correspond to cells.
[out]outputComputeCrisprQcMetricsBuffers object in which to store the output.
optionsFurther options.

◆ compute_crispr_qc_metrics() [2/2]

template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > scran_qc::compute_crispr_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const ComputeCrisprQcMetricsOptions options 
)

Overload of compute_crispr_qc_metrics() that allocates memory for the results.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Parameters
matA tatami matrix containing counts. Each row should correspond to a guide while each column should correspond to a cell.
optionsFurther options.
Returns
An object containing the QC metrics.

◆ compute_crispr_qc_filters() [1/2]

CrisprQcFilters< Float_ > scran_qc::compute_crispr_qc_filters ( size_t  num,
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &  metrics,
const ComputeCrisprQcFiltersOptions options 
)

In CRISPR data, low-quality cells are defined as those with a low count for the most abundant guides. However, directly defining a threshold on the maximum count is somewhat tricky as unsuccessful transfection is not uncommon. This often results in a large subpopulation with low maximum counts, inflating the MAD and compromising the threshold calculation. Instead, we use the following approach:

  1. Compute the median of the proportion of counts in the most abundant guide (i.e., the maximum proportion),
  2. Subset the cells to only those with maximum proportions above the median.
  3. Define a threshold for low outliers on the log-transformed maximum count within the subset (see choose_filter_thresholds() for details).

This assumes that over 50% of cells were successfully transfected with a single guide construct and have high maximum proportions. In contrast, unsuccessful transfections will be dominated by ambient contamination and have low proportions. By taking the subset above the median proportion, we remove all of the unsuccessful transfections and enrich for mostly-high-quality cells. From there, we can apply the usual outlier detection methods on the maximum count, with log-transformation to avoid a negative threshold.

Keep in mind that the maximum proportion is only used to define the subset for threshold calculation. Once the maximum count threshold is computed, they are applied to all cells, regardless of their maximum proportions. This allows us to recover good cells that would have been filtered out by our aggressive median subset. It also ensures that we do not remove cells transfected with multiple guides - such cells are not necessarily uninteresting, e.g., for examining interaction effects, so we will err on the side of caution and leave them in.

Template Parameters
Float_Floating-point type for the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Parameters
numNumber of cells.
metricsA collection of arrays containing CRISPR-based QC metrics, filled by compute_crispr_qc_metrics().
optionsFurther options for filtering.
Returns
Object containing filter thresholds.

◆ compute_crispr_qc_filters() [2/2]

template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
CrisprQcFilters< Float_ > scran_qc::compute_crispr_qc_filters ( const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &  metrics,
const ComputeCrisprQcFiltersOptions options 
)
Template Parameters
Float_Floating-point type for the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Parameters
metricsCRISPR-based QC metrics from compute_crispr_qc_metrics().
optionsFurther options for filtering.
Returns
Object containing filter thresholds.

◆ compute_crispr_qc_filters_blocked() [1/2]

CrisprQcBlockedFilters< Float_ > scran_qc::compute_crispr_qc_filters_blocked ( size_t  num,
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &  metrics,
const Block_ block,
const ComputeCrisprQcFiltersOptions options 
)

This function computes filter thresholds for CRISPR-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_crispr_qc_filters() on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked() for more details.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Block_Integer type for the block assignments.
Parameters
numNumber of cells.
metricsA collection of arrays containing CRISPR-based QC metrics, filled by compute_crispr_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ compute_crispr_qc_filters_blocked() [2/2]

CrisprQcBlockedFilters< Float_ > scran_qc::compute_crispr_qc_filters_blocked ( const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &  metrics,
const Block_ block,
const ComputeCrisprQcFiltersOptions options 
)
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Value_Type of matrix value.
Index_Type of the matrix indices.
Block_Integer type for the block assignments.
Parameters
metricsCRISPR-based QC metrics computed by compute_crispr_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ find_median_mad() [1/2]

FindMedianMadResults< Float_ > scran_qc::find_median_mad ( Index_  num,
Float_ metrics,
const FindMedianMadOptions options 
)

Pretty much as it says on the can; calculates the median of an array of values first, and uses the median to then compute the median absolute deviation (MAD) from that array.

Template Parameters
Index_Integer type for array indices.
Float_Floating-point type for input and output.
Parameters
numNumber of observations.
[in]metricsPointer to an array of observations of length num. NaNs are ignored. Array contents are arbitrarily modified on function return and should not be used afterwards.
optionsFurther options.
Returns
Median and MAD for metrics, possibly after log-transformation.

◆ find_median_mad() [2/2]

template<typename Float_ = double, typename Index_ , typename Value_ >
FindMedianMadResults< Float_ > scran_qc::find_median_mad ( Index_  num,
const Value_ metrics,
Float_ buffer,
const FindMedianMadOptions options 
)

Overload of find_median_mad() that uses an auxiliary buffer to avoid mutating the input array of values.

Template Parameters
Index_Integer type for array indices.
Value_Type for the input.
Float_Floating-point type for output.
Parameters
numNumber of observations.
[in]metricsPointer to an array of observations of length num. NaNs are ignored. Array contents are arbitrarily modified on function return and should not be used afterwards.
[out]bufferPointer to an array of length num, containing a buffer to use for storing intermediate results. This can also be NULL in which case a buffer is allocated.
optionsFurther options.
Returns
Median and MAD for metrics, possibly after log-transformation.

◆ find_median_mad_blocked()

std::vector< FindMedianMadResults< Output_ > > scran_qc::find_median_mad_blocked ( Index_  num,
const Value_ metrics,
const Block_ block,
FindMedianMadWorkspace< Output_, Index_ > *  workspace,
const FindMedianMadOptions options 
)

For blocked datasets, this function computes the median and MAD for each block. It is equivalent to calling find_median_mad() separately on all observations from each block.

Template Parameters
Output_Floating-point type for the output.
Index_Integer type for array indices.
Block_Integer type, containing the block IDs.
Value_Numeric type for the input.
Parameters
numNumber of observations.
[in]metricsPointer to an array of observations of length num. NaNs are ignored.
[in]blockOptional pointer to an array of block identifiers. If provided, the array should be of length equal to num. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. If a null pointer is supplied, all observations are assumed to belong to the same block.
workspacePointer to a workspace object, either (i) constructed on num and block or (ii) configured using FindMedianMadWorkspace::set() on num and block. The same object can be re-used across multiple calls to find_median_mad_blocked() with the same num and block. This can also be NULL in which case a new workspace is allocated.
optionsFurther options.
Returns
Vector of length \(N\), where each entry contains the median and MAD for each block in block.

◆ filter_index() [1/2]

template<typename Index_ , typename Keep_ >
void scran_qc::filter_index ( Index_  num,
const Keep_ filter,
std::vector< Index_ > &  output 
)

Convert the filtering vectors produced by compute_rna_qc_filters() and friends into formats that can be used for downstream analysis. In particular, we want to slice the original feature-by-cell matrix so only the high-quality subset of cells are retained. This is most easily done by using tatami::make_DelayedSubset() to subset the tatami::Matrix with the indices of the high-quality cells. For this purpose, we can use filter_index() to convert the boolean filtering vector into a vector of sorted and unique column indices.

Template Parameters
Index_Integer type for array indices.
Keep_Boolean type for the filter.
Parameters
numNumber of cells in the dataset.
[in]filterPointer to an array of length num, indicating whether a cell is of high quality.
[out]outputOn output, a vector of sorted and unique indices of the cells considered to be high quality.

◆ filter_index() [2/2]

template<typename Index_ , typename Keep_ >
std::vector< Index_ > scran_qc::filter_index ( Index_  num,
const Keep_ filter 
)

Overload of filter_index() that returns a vector directly.

Template Parameters
Index_Integer type for array indices.
Keep_Boolean type for each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filterPointer to an array of length num, indicating whether a cell is of high quality.
Returns
Vector of sorted and unique indices of the cells considered to be high quality.

◆ combine_filters() [1/2]

void scran_qc::combine_filters ( size_t  num,
const std::vector< Keep_ * > &  filters,
Output_ output 
)

When dealing with multiple filters from different modalities (e.g., CrisprQcFilters::filter(), AdtQcFilters::filter()), our default strategy is to take the intersection, i.e., we only retain cells that are considered to be high quality in all modalities. This ensures that downstream analyses can be safely performed on each modality in the filtered dataset.

Template Parameters
Keep_Boolean type for each filter modality.
Output_Boolean type for the output.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
[out]outputPointer to an array of length num. On output, this is filled with truthy values only for cells that are high quality in all modalities.

◆ combine_filters() [2/2]

template<typename Output_ = uint8_t, typename Keep_ = uint8_t>
std::vector< Output_ > scran_qc::combine_filters ( size_t  num,
const std::vector< const Keep_ * > &  filters 
)

Overload of combine_filters() that returns a vector directly.

Template Parameters
Output_Boolean type for the output.
Keep_Boolean type for each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
Returns
Vector of length num, indicating which cells are high quality in all modalities.

◆ combine_filters_index() [1/2]

template<typename Index_ , typename Keep_ >
void scran_qc::combine_filters_index ( Index_  num,
const std::vector< const Keep_ * > &  filters,
std::vector< Index_ > &  output 
)

This has the same behavior as combine_filters() followed by filter_index().

Template Parameters
Index_Integer type for array indices.
Keep_Boolean type for each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
[out]outputOn output, a vector of sorted and unique indices of the cells considered to be high quality in all modalities.

◆ combine_filters_index() [2/2]

template<typename Index_ , typename Keep_ >
std::vector< Index_ > scran_qc::combine_filters_index ( Index_  num,
const std::vector< const Keep_ * > &  filters 
)

Overload of combine_filters_index() that returns a vector directly.

Template Parameters
Index_Integer type for array indices.
Keep_Boolean type for each filter modality.
Parameters
numNumber of cells in the dataset.
[in]filtersVector of pointers to arrays of length num. Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality.
Returns
Vector of sorted and unique indices of the cells considered to be high quality in all modalities.

◆ per_cell_qc_metrics() [1/2]

void scran_qc::per_cell_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const std::vector< Subset_ > &  subsets,
const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &  output,
const PerCellQcMetricsOptions options 
)

Given a feature-by-cell expression matrix (usually containing counts), we compute several QC metrics:

  • The sum of expression values for each cell, which represents the efficiency of library preparation and sequencing. Low sums indicate that the library was not successfully captured.
  • The number of detected features (i.e., with non-zero counts). This also quantifies the library preparation efficiency, but with a greater focus on capturing the transcriptional complexity.
  • The maximum value across all features. This is useful in situations where only one feature is expected to be present, e.g., CRISPR guides, hash tags.
  • The row index of the feature with the maximum count. If multiple features are tied for the maximum count, the earliest feature is reported.
  • The sum of expression values in pre-defined feature subsets. The exact interpretation depends on the nature of the subset - most commonly, one subset will contain all genes on the mitochondrial chromosome, where higher proportions of counts in the mitochondrial subset indicate cell damage due to loss of cytoplasmic transcripts. Spike-in proportions can be interpreted in a similar manner.
  • The number of detected features in pre-defined feature subsets. Analogous to the number of detected features for the entire feature space.
Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Sum_Floating point type to store the sums.
Detected_Integer type to store the number of detected cells.
Parameters
matA tatami matrix, typically containing count data. Rows should correspond to features (e.g., genes) while columns should correspond to cells.
[in]subsetsVector of feature subsets, where each entry represents a feature subset and may be either:
  • A pointer to an array of length equal to mat.nrow() where each entry is interpretable as a boolean. This indicates whether each row in mat belongs to the subset.
  • A std::vector containing sorted and unique row indices. This specifies the rows in mat that belong to the subset.
[out]outputCollection of buffers in which the computed statistics are to be stored.
optionsFurther options.

◆ per_cell_qc_metrics() [2/2]

PerCellQcMetricsResults< Sum_, Detected_, Value_, Index_ > scran_qc::per_cell_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const std::vector< Subset_ > &  subsets,
const PerCellQcMetricsOptions options 
)
Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Sum_Floating point type to store the sums.
Detected_Integer type to store the number of detected cells.
Parameters
matA tatami matrix, typically containing count data. Rows should correspond to features (e.g., genes) while columns should correspond to cells.
[in]subsetsVector of feature subsets, where each entry represents a feature subset and may be either:
  • A pointer to an array of length equal to mat.nrow() where each entry is interpretable as a boolean. This indicates whether each row in mat belongs to the subset.
  • A std::vector containing sorted and unique row indices. This specifies the rows in mat that belong to the subset.
optionsFurther options.
Returns
Object containing the QC metrics. Not all metrics may be computed depending on options.

◆ compute_rna_qc_metrics() [1/2]

void scran_qc::compute_rna_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const std::vector< Subset_ > &  subsets,
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &  output,
const ComputeRnaQcMetricsOptions options 
)

Given a feature-by-cell RNA count matrix, we compute several metrics for filtering high-quality cells:

  • The total sum of counts for each cell, which represents the efficiency of library preparation and sequencing. Low totals indicate that the library was not successfully captured.
  • The number of detected features. This also quantifies the library preparation efficiency, but with a greater focus on capturing the transcriptional complexity.
  • The proportion of counts in pre-defined feature subsets, the exact interpretation of which depends on the nature of the subset. Typically, one subset contains all genes on the mitochondrial chromosome, where higher proportions are representative of cell damage; the assumption is that cytoplasmic transcripts leak through tears in the cell membrane while the mitochondria are still trapped inside. The prportion of spike-in transcripts can be interpreted in a similar manner, where the loss of endogenous transcripts results in higher spike-in proportions.

We use these metrics to define thresholds for filtering in compute_rna_qc_filters().

Template Parameters
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Parameters
matA tatami matrix containing counts. Rows should correspond to genes while columns should correspond to cells.
[in]subsetsVector of feature subsets, typically mitochondrial genes or spike-in transcripts. See per_cell_qc_metrics() for more details on the expected format.
[out]outputCollection of buffers in which to store the output.
optionsFurther options.

◆ compute_rna_qc_metrics() [2/2]

template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > scran_qc::compute_rna_qc_metrics ( const tatami::Matrix< Value_, Index_ > &  mat,
const std::vector< Subset_ > &  subsets,
const ComputeRnaQcMetricsOptions options 
)

Overload of compute_rna_qc_metrics() that allocates memory for the results.

Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Value_Type of matrix value.
Index_Type of the matrix indices.
Subset_Either a pointer to an array of booleans or a vector of indices.
Parameters
matA tatami matrix containing counts. Rows should correspond to genes while columns should correspond to cells.
[in]subsetsVector of feature subsets, typically mitochondrial genes or spike-in transcripts. See per_cell_qc_metrics() for more details on the expected format.
optionsFurther options.
Returns
An object containing the QC metrics. Subset proportions are returned depending on the subsets.

◆ compute_rna_qc_filters() [1/2]

template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
RnaQcFilters< Float_ > scran_qc::compute_rna_qc_filters ( size_t  num,
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &  metrics,
const ComputeRnaQcFiltersOptions options 
)

Using the RNA-relevant QC metrics from compute_rna_qc_metrics(), we consider low-quality cells to be those with a low sum, a low number of detected genes, and high subset proportions. we define thresholds for each metric using an MAD-based outlier approach. For the total counts and number of detected features, the outliers are defined after log-transformation of the metrics.

Template Parameters
Float_Floating-point type for the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Parameters
numNumber of cells.
metricsA collection of buffers containing RNA-based QC metrics, filled by compute_rna_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_rna_qc_filters() [2/2]

template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
RnaQcFilters< Float_ > scran_qc::compute_rna_qc_filters ( const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &  metrics,
const ComputeRnaQcFiltersOptions options 
)

This function computes filter thresholds for RNA-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_rna_qc_filters() on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked() for more details.

Template Parameters
Float_Floating-point type for the thresholds.
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Parameters
metricsRNA-based QC metrics from compute_rna_qc_metrics().
optionsFurther options for filtering.
Returns
An object containing the filter thresholds.

◆ compute_rna_qc_filters_blocked() [1/2]

RnaQcBlockedFilters< Float_ > scran_qc::compute_rna_qc_filters_blocked ( size_t  num,
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &  metrics,
const Block_ block,
const ComputeRnaQcFiltersOptions options 
)
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Block_Integer type for the block assignments.
Parameters
numNumber of cells.
metricsA collection of buffers containing RNA-based QC metrics, filled by compute_rna_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.

◆ compute_rna_qc_filters_blocked() [2/2]

RnaQcBlockedFilters< Float_ > scran_qc::compute_rna_qc_filters_blocked ( const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &  metrics,
const Block_ block,
const ComputeRnaQcFiltersOptions options 
)
Template Parameters
Sum_Numeric type to store the summed expression.
Detected_Integer type to store the number of cells.
Proportion_Floating-point type to store the proportions.
Block_Integer type for the block assignments.
Parameters
metricsRNA-based QC metrics computed by compute_rna_qc_metrics().
[in]blockPointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks.
optionsFurther options for filtering.
Returns
Object containing filter thresholds for each block.