scran_qc
Simple quality control on single-cell data
|
Simple quality control for single-cell data. More...
Functions | |
template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ > | |
void | compute_adt_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options) |
template<typename Sum_ = double, typename Detected_ = int, typename Value_ , typename Index_ , typename Subset_ > | |
ComputeAdtQcMetricsResults< Sum_, Detected_ > | compute_adt_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ > | |
AdtQcFilters< Float_ > | compute_adt_qc_filters (const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ > | |
AdtQcFilters< Float_ > | compute_adt_qc_filters (const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ > | |
AdtQcBlockedFilters< Float_ > | compute_adt_qc_filters_blocked (const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *const block, const ComputeAdtQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Block_ > | |
AdtQcBlockedFilters< Float_ > | compute_adt_qc_filters_blocked (const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block, const ComputeAdtQcFiltersOptions &options) |
template<typename Float_ > | |
ChooseFilterThresholdsResults< Float_ > | choose_filter_thresholds (const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options) |
template<typename Float_ > | |
ChooseFilterThresholdsResults< Float_ > | choose_filter_thresholds (const std::size_t num, Float_ *const metrics, const ChooseFilterThresholdsOptions &options) |
template<typename Value_ , typename Float_ > | |
ChooseFilterThresholdsResults< Float_ > | choose_filter_thresholds (const std::size_t num, const Value_ *const metrics, Float_ *const buffer, const ChooseFilterThresholdsOptions &options) |
template<typename Float_ > | |
std::vector< ChooseFilterThresholdsResults< Float_ > > | choose_filter_thresholds_blocked (const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options) |
template<typename Value_ , typename Block_ , typename Float_ > | |
std::vector< ChooseFilterThresholdsResults< Float_ > > | choose_filter_thresholds_blocked (const std::size_t num, const Value_ *const metrics, const Block_ *const block, FindMedianMadWorkspace< Float_ > *const workspace, const ChooseFilterThresholdsOptions &options) |
template<typename Value_ , typename Index_ , typename Sum_ , typename Detected_ > | |
void | compute_crispr_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options) |
template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int> | |
ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > | compute_crispr_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ > | |
CrisprQcFilters< Float_ > | compute_crispr_qc_filters (const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ > | |
CrisprQcFilters< Float_ > | compute_crispr_qc_filters (const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ > | |
CrisprQcBlockedFilters< Float_ > | compute_crispr_qc_filters_blocked (const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, const ComputeCrisprQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Value_ , typename Index_ , typename Block_ > | |
CrisprQcBlockedFilters< Float_ > | compute_crispr_qc_filters_blocked (const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, const ComputeCrisprQcFiltersOptions &options) |
template<typename Float_ > | |
FindMedianMadResults< Float_ > | find_median_mad (std::size_t num, Float_ *metrics, const FindMedianMadOptions &options) |
template<typename Float_ = double, typename Value_ > | |
FindMedianMadResults< Float_ > | find_median_mad (const std::size_t num, const Value_ *const metrics, Float_ *buffer, const FindMedianMadOptions &options) |
template<typename Output_ = double, typename Value_ , typename Block_ > | |
std::vector< FindMedianMadResults< Output_ > > | find_median_mad_blocked (const std::size_t num, const Value_ *const metrics, const Block_ *const block, FindMedianMadWorkspace< Output_ > *workspace, const FindMedianMadOptions &options) |
template<typename Keep_ , typename Index_ > | |
void | filter_index (const std::size_t num, const Keep_ *const filter, std::vector< Index_ > &output) |
template<typename Index_ , typename Keep_ > | |
std::vector< Index_ > | filter_index (const std::size_t num, const Keep_ *const filter) |
template<typename Keep_ , typename Output_ > | |
void | combine_filters (const std::size_t num, const std::vector< Keep_ * > &filters, Output_ *const output) |
template<typename Output_ = unsigned char, typename Keep_ > | |
std::vector< Output_ > | combine_filters (const std::size_t num, const std::vector< const Keep_ * > &filters) |
template<typename Index_ , typename Keep_ > | |
void | combine_filters_index (const Index_ num, const std::vector< const Keep_ * > &filters, std::vector< Index_ > &output) |
template<typename Index_ , typename Keep_ > | |
std::vector< Index_ > | combine_filters_index (const Index_ num, const std::vector< const Keep_ * > &filters) |
template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ > | |
void | per_cell_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options) |
template<typename Sum_ = double, typename Detected_ = int, typename Value_ , typename Index_ , typename Subset_ > | |
PerCellQcMetricsResults< Sum_, Detected_, Value_, Index_ > | per_cell_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsOptions &options) |
template<typename Value_ , typename Index_ , typename Subset_ , typename Sum_ , typename Detected_ , typename Proportion_ > | |
void | compute_rna_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options) |
template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_ , typename Index_ , typename Subset_ > | |
ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > | compute_rna_qc_metrics (const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ > | |
RnaQcFilters< Float_ > | compute_rna_qc_filters (const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ > | |
RnaQcFilters< Float_ > | compute_rna_qc_filters (const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ > | |
RnaQcBlockedFilters< Float_ > | compute_rna_qc_filters_blocked (const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const ComputeRnaQcFiltersOptions &options) |
template<typename Float_ = double, typename Sum_ , typename Detected_ , typename Proportion_ , typename Block_ > | |
RnaQcBlockedFilters< Float_ > | compute_rna_qc_filters_blocked (const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const ComputeRnaQcFiltersOptions &options) |
Simple quality control for single-cell data.
void scran_qc::compute_adt_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const std::vector< Subset_ > & | subsets, | ||
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > & | output, | ||
const ComputeAdtQcMetricsOptions & | options ) |
Given a feature-by-cell ADT count matrix, this function uses per_cell_qc_metrics()
to compute several ADT-relevant QC metrics:
We use these metrics to define thresholds for filtering in compute_adt_qc_filters()
.
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Subset_ | Either a pointer to an array of booleans or a vector of indices. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
mat | A matrix of non-negative counts. Rows correspond to ADT features while columns correspond to cells. | |
[in] | subsets | Vector of feature subsets, typically IgG controls. See per_cell_qc_metrics() for more details on the expected format. |
[out] | output | ComputeAdtQcMetricsBuffers object in which to store the output. |
options | Further options. |
ComputeAdtQcMetricsResults< Sum_, Detected_ > scran_qc::compute_adt_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const std::vector< Subset_ > & | subsets, | ||
const ComputeAdtQcMetricsOptions & | options ) |
Overload of compute_adt_qc_metrics()
that allocates memory for the results.
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Subset_ | Either a pointer to an array of booleans or a vector of indices. |
mat | A matrix of non-negative counts. Rows correspond to ADT features while columns correspond to cells. | |
[in] | subsets | Vector of feature subsets, typically IgG controls. See per_cell_qc_metrics() for more details on the expected format. |
options | Further options. |
AdtQcFilters< Float_ > scran_qc::compute_adt_qc_filters | ( | const std::size_t | num, |
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > & | metrics, | ||
const ComputeAdtQcFiltersOptions & | options ) |
Given the ADT-relevant QC metrics from compute_adt_qc_metrics()
, we consider low-quality cells to be those with a low number of detected tags and high subset sums.
For each subset's sum, we define the upper threshold using the MAD-based outlier approach implemented in choose_filter_thresholds()
. This is done using the specified ComputeAdtQcFiltersOptions::num_mads
and after log-transformation of the sums.
For the number of detected features, we define a lower threshold as the lower of:
choose_filter_thresholds()
, computed using the specified ComputeAdtQcFiltersOptions::num_mads
and after log-transformation.1 - ComputeAdtQcFiltersOptions::min_detected_drop
.So by default, cells are only considered to be low quality if the number of detected features drops 10% or more below the median. This avoids overly aggressive filtering when the MAD is zero due to the discrete nature of this statistic in datasets with few tags.
Float_ | Floating-point type of the thresholds. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
num | Number of cells. |
metrics | A collection of arrays containing ADT-based QC metrics, filled by compute_adt_qc_metrics() . |
options | Further options for filtering. |
AdtQcFilters< Float_ > scran_qc::compute_adt_qc_filters | ( | const ComputeAdtQcMetricsResults< Sum_, Detected_ > & | metrics, |
const ComputeAdtQcFiltersOptions & | options ) |
Float_ | Floating-point type of the thresholds. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
metrics | ADT-based QC metrics from compute_adt_qc_metrics() . |
options | Further options for filtering. |
AdtQcBlockedFilters< Float_ > scran_qc::compute_adt_qc_filters_blocked | ( | const std::size_t | num, |
const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > & | metrics, | ||
const Block_ *const | block, | ||
const ComputeAdtQcFiltersOptions & | options ) |
This function computes filter thresholds for ADT-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_adt_qc_filters()
on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked()
for more details.
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Block_ | Integer type of the block assignments. |
num | Number of cells. | |
metrics | A collection of arrays containing ADT-based QC metrics, filled by compute_adt_qc_metrics() . | |
[in] | block | Pointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. |
options | Further options for filtering. |
AdtQcBlockedFilters< Float_ > scran_qc::compute_adt_qc_filters_blocked | ( | const ComputeAdtQcMetricsResults< Sum_, Detected_ > & | metrics, |
const Block_ *const | block, | ||
const ComputeAdtQcFiltersOptions & | options ) |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Block_ | Integer type of the block assignments. |
metrics | ADT-based QC metrics computed by compute_adt_qc_metrics() . | |
[in] | block | Pointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. |
options | Further options for filtering. |
ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds | ( | const FindMedianMadResults< Float_ > & | mm, |
const ChooseFilterThresholdsOptions & | options ) |
We define filter thresholds on the QC metrics by assuming that most cells in the experiment are of high (or at least acceptable) quality. Any outlier values are indicative of low-quality cells that should be filtered out. Given an array of values, outliers are defined as those that are more than some number of median absolute deviations (MADs) from the median value. Outliers can be defined in both directions or just a single direction, depending on the interpretation of the QC metric. We can also apply a log-transformation to the metrics to identify outliers with respect to their fold-change from the median.
Float_ | Floating-point type for the thresholds. |
mm | Median and MADc computed by find_median_mad() . If ChooseFilterThresholdsOptions::log = true , it is expected that the median and MAD are computed on the log-transformed metrics (i.e., FindMedianMadOptions::log = true ). |
options | Further options. |
mm
. ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds | ( | const std::size_t | num, |
Float_ *const | metrics, | ||
const ChooseFilterThresholdsOptions & | options ) |
This overload computes the median and MAD via find_median_mad()
before deriving thresholds with choose_filter_thresholds()
.
Float_ | Floating-point type for the metrics and thresholds. |
num | Number of cells. | |
[in] | metrics | Pointer to an array of length num , containing a QC metric for each cell. This is modified arbitrarily on output. |
options | Further options. |
metrics
. ChooseFilterThresholdsResults< Float_ > scran_qc::choose_filter_thresholds | ( | const std::size_t | num, |
const Value_ *const | metrics, | ||
Float_ *const | buffer, | ||
const ChooseFilterThresholdsOptions & | options ) |
Overload of choose_filter_thresholds()
that uses an auxiliary buffer to avoid mutating metrics
.
Value_ | Type for the input data. |
Float_ | Floating-point type for the metrics and thresholds. |
num | Number of cells. | |
[in] | metrics | Pointer to an array of length num , containing a QC metric for each cell. |
buffer | Pointer to an array of length num in which to store intermediate results. Alternatively NULL, in which case a buffer is automatically allocated. | |
options | Further options. |
metrics
. std::vector< ChooseFilterThresholdsResults< Float_ > > scran_qc::choose_filter_thresholds_blocked | ( | const std::vector< FindMedianMadResults< Float_ > > & | mms, |
const ChooseFilterThresholdsOptions & | options ) |
For datasets with multiple blocks, we can compute block-specific thresholds for each metric. This is equivalent to calling choose_filter_thresholds()
on the cells for each block. Our assumption is that differences in the metric distributions between blocks are driven by uninteresting causes (e.g., differences in sequencing depth); variable thresholds can adapt to each block's distribution for effective removal of outliers.
That said, if the differences in the distributions between blocks are interesting, it may be preferable to ignore the blocking factor and just use choose_filter_thresholds()
instead. This ensures that the MADs are increased appropriately to avoid filtering out interesting variation.
Float_ | Floating-point type for the thresholds. |
mms | Vector of medians and MADs for each block. |
options | Further options. |
std::vector< ChooseFilterThresholdsResults< Float_ > > scran_qc::choose_filter_thresholds_blocked | ( | const std::size_t | num, |
const Value_ *const | metrics, | ||
const Block_ *const | block, | ||
FindMedianMadWorkspace< Float_ > *const | workspace, | ||
const ChooseFilterThresholdsOptions & | options ) |
This overload computes the median and MAD for each block via find_median_mad_blocked()
before deriving thresholds in each block with choose_filter_thresholds_blocked()
.
Value_ | Type for the input data. |
Float_ | Floating-point type for the metrics and thresholds. |
num | Number of cells. | |
[in] | metrics | Pointer to an array of length num , containing a QC metric for each cell. |
[in] | block | Optional pointer to an array of block identifiers, see find_median_mad_blocked() for details. |
workspace | Pointer to a workspace object, see find_median_mad_blocked() for details. | |
options | Further options. |
void scran_qc::compute_crispr_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & | output, | ||
const ComputeCrisprQcMetricsOptions & | options ) |
Given a guide-by-cell count matrix, this function uses per_cell_qc_metrics()
to compute several CRISPR-relevant QC metrics:
We use these metrics to define thresholds for filtering in compute_crispr_qc_filters()
.
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
mat | A matrix of non-negative counts. Rows correspond to CRISPR guides while columns correspond to cells. | |
[out] | output | ComputeCrisprQcMetricsBuffers object in which to store the output. |
options | Further options. |
ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > scran_qc::compute_crispr_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const ComputeCrisprQcMetricsOptions & | options ) |
Overload of compute_crispr_qc_metrics()
that allocates memory for the results.
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Subset_ | Either a pointer to an array of booleans or a vector of indices. |
mat | A matrix of non-negative counts. Each row should correspond to a CRISPR guide while each column should correspond to a cell. |
options | Further options. |
CrisprQcFilters< Float_ > scran_qc::compute_crispr_qc_filters | ( | const std::size_t | num, |
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & | metrics, | ||
const ComputeCrisprQcFiltersOptions & | options ) |
In CRISPR data, a cell is considered to be of low quality if it has a low count for its most abundant guide. However, directly applying choose_filter_thresholds()
on the maximum count is somewhat tricky as unsuccessful transfection can be common. This results in a large subpopulation with low maximum counts, inflating the MAD and compromising the threshold calculation. Instead, we use the following approach:
choose_filter_thresholds()
for details). This is now possible as we can assume that most of the remaining cells are of high quality.Note that the maximum proportion is only used to define the subset for threshold calculation. Once the maximum count threshold is computed, it is applied to all cells regardless of their maximum proportions. This ensures that we correctly remove cells with low coverage, even if the proportion is high. It also allows us to retain cells transfected with multiple guides, as long as the maximum is high enough - such cells are not necessarily uninteresting, e.g., for examining interaction effects, so we will err on the side of caution and leave them in.
Float_ | Floating-point type of the thresholds. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
num | Number of cells. |
metrics | A collection of arrays containing CRISPR-based QC metrics, filled by compute_crispr_qc_metrics() . |
options | Further options for filtering. |
CrisprQcFilters< Float_ > scran_qc::compute_crispr_qc_filters | ( | const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > & | metrics, |
const ComputeCrisprQcFiltersOptions & | options ) |
Float_ | Floating-point type of the thresholds. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
metrics | CRISPR-based QC metrics from compute_crispr_qc_metrics() . |
options | Further options for filtering. |
CrisprQcBlockedFilters< Float_ > scran_qc::compute_crispr_qc_filters_blocked | ( | const std::size_t | num, |
const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & | metrics, | ||
const Block_ *const | block, | ||
const ComputeCrisprQcFiltersOptions & | options ) |
This function computes filter thresholds for CRISPR-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_crispr_qc_filters()
on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked()
for more details.
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Block_ | Integer type of the block assignments. |
num | Number of cells. | |
metrics | A collection of arrays containing CRISPR-based QC metrics, filled by compute_crispr_qc_metrics() . | |
[in] | block | Pointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. |
options | Further options for filtering. |
CrisprQcBlockedFilters< Float_ > scran_qc::compute_crispr_qc_filters_blocked | ( | const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > & | metrics, |
const Block_ *const | block, | ||
const ComputeCrisprQcFiltersOptions & | options ) |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Block_ | Integer type of the block assignments. |
metrics | CRISPR-based QC metrics computed by compute_crispr_qc_metrics() . | |
[in] | block | Pointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. |
options | Further options for filtering. |
FindMedianMadResults< Float_ > scran_qc::find_median_mad | ( | std::size_t | num, |
Float_ * | metrics, | ||
const FindMedianMadOptions & | options ) |
Pretty much as it says on the can; calculates the median of an array of values first, and uses the median to then compute the median absolute deviation (MAD) from that array.
Float_ | Floating-point type of the input and output. |
num | Number of observations. | |
[in] | metrics | Pointer to an array of observations of length num . NaNs are ignored. Array contents are arbitrarily modified on function return and should not be used afterwards. |
options | Further options. |
metrics
, possibly after log-transformation. FindMedianMadResults< Float_ > scran_qc::find_median_mad | ( | const std::size_t | num, |
const Value_ *const | metrics, | ||
Float_ * | buffer, | ||
const FindMedianMadOptions & | options ) |
Overload of find_median_mad()
that uses an auxiliary buffer to avoid mutating the input array of values.
Value_ | Type for the input. |
Float_ | Floating-point type of the output. |
num | Number of observations. | |
[in] | metrics | Pointer to an array of observations of length num . NaNs are ignored. |
[out] | buffer | Pointer to an array of length num , containing a buffer to use for storing intermediate results. Array contents are arbitrarily modified on function return and should not be used afterwards. This can also be NULL in which case a buffer is allocated. |
options | Further options. |
metrics
, possibly after log-transformation. std::vector< FindMedianMadResults< Output_ > > scran_qc::find_median_mad_blocked | ( | const std::size_t | num, |
const Value_ *const | metrics, | ||
const Block_ *const | block, | ||
FindMedianMadWorkspace< Output_ > * | workspace, | ||
const FindMedianMadOptions & | options ) |
For blocked datasets, this function computes the median and MAD for each block. It is equivalent to calling find_median_mad()
separately on all observations from each block.
Output_ | Floating-point type of the output. |
Block_ | Integer type, containing the block IDs. |
Value_ | Numeric type of the input. |
num | Number of observations. | |
[in] | metrics | Pointer to an array of observations of length num . NaNs are ignored. |
[in] | block | Optional pointer to an array of block identifiers. If provided, the array should be of length equal to num . Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. If a null pointer is supplied, all observations are assumed to belong to the same block. |
workspace | Pointer to a workspace object, either (i) constructed on num and block or (ii) configured using FindMedianMadWorkspace::set() on num and block . The same object can be re-used across multiple calls to find_median_mad_blocked() with the same num and block . This can also be NULL in which case a new workspace is allocated. | |
options | Further options. |
block
. void scran_qc::filter_index | ( | const std::size_t | num, |
const Keep_ *const | filter, | ||
std::vector< Index_ > & | output ) |
Convert the filtering vectors produced by compute_rna_qc_filters()
and friends into formats that can be used for downstream analysis. In particular, we want to slice the original feature-by-cell matrix so only the high-quality subset of cells are retained. This is most easily done by using tatami::make_DelayedSubset()
to subset the tatami::Matrix
with the indices of the high-quality cells. For this purpose, we can use filter_index()
to convert the boolean filtering vector into a vector of sorted and unique column indices.
Keep_ | Boolean type of the filter. |
Index_ | Integer type of array indices. |
num | Number of cells in the dataset. | |
[in] | filter | Pointer to an array of length num , indicating whether a cell is of high quality. |
[out] | output | On output, a vector of sorted and unique indices of the cells considered to be high quality. |
std::vector< Index_ > scran_qc::filter_index | ( | const std::size_t | num, |
const Keep_ *const | filter ) |
Overload of filter_index()
that returns a vector directly.
Index_ | Integer type of array indices. |
Keep_ | Boolean type of each filter modality. |
num | Number of cells in the dataset. | |
[in] | filter | Pointer to an array of length num , indicating whether a cell is of high quality. |
void scran_qc::combine_filters | ( | const std::size_t | num, |
const std::vector< Keep_ * > & | filters, | ||
Output_ *const | output ) |
When dealing with multiple filters from different modalities (e.g., CrisprQcFilters::filter()
, AdtQcFilters::filter()
), our default strategy is to take the intersection, i.e., we only retain cells that are considered to be high quality in all modalities. This ensures that downstream analyses can be safely performed on each modality in the filtered dataset.
Keep_ | Boolean type of each filter modality. |
Output_ | Boolean type of the output. |
num | Number of cells in the dataset. | |
[in] | filters | Vector of pointers to arrays of length num . Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality. |
[out] | output | Pointer to an array of length num . On output, this is filled with truthy values only for cells that are high quality in all modalities. |
std::vector< Output_ > scran_qc::combine_filters | ( | const std::size_t | num, |
const std::vector< const Keep_ * > & | filters ) |
Overload of combine_filters()
that returns a vector directly.
Output_ | Boolean type of the output. |
Keep_ | Boolean type of each filter modality. |
num | Number of cells in the dataset. | |
[in] | filters | Vector of pointers to arrays of length num . Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality. |
num
, indicating which cells are high quality in all modalities. void scran_qc::combine_filters_index | ( | const Index_ | num, |
const std::vector< const Keep_ * > & | filters, | ||
std::vector< Index_ > & | output ) |
This has the same behavior as combine_filters()
followed by filter_index()
.
Keep_ | Boolean type of each filter modality. |
num | Number of cells in the dataset. | |
[in] | filters | Vector of pointers to arrays of length num . Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality. |
[out] | output | On output, a vector of sorted and unique indices of the cells considered to be high quality in all modalities. |
std::vector< Index_ > scran_qc::combine_filters_index | ( | const Index_ | num, |
const std::vector< const Keep_ * > & | filters ) |
Overload of combine_filters_index()
that returns a vector directly.
Index_ | Integer type of array indices. |
Keep_ | Boolean type of each filter modality. |
num | Number of cells in the dataset. | |
[in] | filters | Vector of pointers to arrays of length num . Each array corresponds to a modality and indicates whether each cell is high quality (truthy) or not (falsey) for that modality. |
void scran_qc::per_cell_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const std::vector< Subset_ > & | subsets, | ||
const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > & | output, | ||
const PerCellQcMetricsOptions & | options ) |
Given a feature-by-cell expression matrix (usually containing non-negative counts), we compute several QC metrics:
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Subset_ | Either a pointer to an array of booleans or a std::vector of indices. |
Sum_ | Floating point type to store the sums. |
Detected_ | Integer type to store the number of detected cells. |
mat | A matrix of non-negative counts. Rows should correspond to features (e.g., genes) while columns should correspond to cells. | |
[in] | subsets | Vector of feature subsets, where each entry represents a feature subset and may be either:
|
[out] | output | Collection of buffers in which the computed statistics are to be stored. |
options | Further options. |
PerCellQcMetricsResults< Sum_, Detected_, Value_, Index_ > scran_qc::per_cell_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const std::vector< Subset_ > & | subsets, | ||
const PerCellQcMetricsOptions & | options ) |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Subset_ | Either a pointer to an array of booleans or a std::vector of indices. |
Sum_ | Floating point type to store the sums. |
Detected_ | Integer type to store the number of detected cells. |
mat | A matrix of non-negative counts. Rows should correspond to features (e.g., genes) while columns should correspond to cells. | |
[in] | subsets | Vector of feature subsets, where each entry represents a feature subset and may be either:
|
options | Further options. |
options
. void scran_qc::compute_rna_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const std::vector< Subset_ > & | subsets, | ||
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > & | output, | ||
const ComputeRnaQcMetricsOptions & | options ) |
Given a feature-by-cell RNA count matrix, we compute several metrics for filtering high-quality cells:
We use these metrics to define thresholds for filtering in compute_rna_qc_filters()
.
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Subset_ | Either a pointer to an array of booleans or a vector of indices. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Proportion_ | Floating-point type to store the proportions. |
mat | A matrix of non-negative counts. Rows should correspond to genes while columns should correspond to cells. | |
[in] | subsets | Vector of feature subsets, typically mitochondrial genes or spike-in transcripts. See the argument of the same name in per_cell_qc_metrics() for more details on the expected format. |
[out] | output | Collection of buffers in which to store the output. |
options | Further options. |
ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > scran_qc::compute_rna_qc_metrics | ( | const tatami::Matrix< Value_, Index_ > & | mat, |
const std::vector< Subset_ > & | subsets, | ||
const ComputeRnaQcMetricsOptions & | options ) |
Overload of compute_rna_qc_metrics()
that allocates memory for the results.
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Proportion_ | Floating-point type to store the proportions. |
Value_ | Type of matrix value. |
Index_ | Type of the matrix indices. |
Subset_ | Either a pointer to an array of booleans or a std::vector of indices. |
mat | A matrix of non-negative counts. Rows should correspond to genes while columns should correspond to cells. | |
[in] | subsets | Vector of feature subsets, typically mitochondrial genes or spike-in transcripts. See the argument of the same name in per_cell_qc_metrics() for more details on the expected format. |
options | Further options. |
subsets
. RnaQcFilters< Float_ > scran_qc::compute_rna_qc_filters | ( | const std::size_t | num, |
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > & | metrics, | ||
const ComputeRnaQcFiltersOptions & | options ) |
Given the RNA-relevant QC metrics from compute_rna_qc_metrics()
, we consider low-quality cells to be those with a low sum, a low number of detected genes, or high subset proportions. We define thresholds for each metric using the MAD-based outlier approach implemented in choose_filter_thresholds()
. For the total counts and number of detected features, the outliers are defined after log-transformation of the metrics.
Float_ | Floating-point type of the thresholds. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Proportion_ | Floating-point type to store the proportions. |
num | Number of cells. |
metrics | A collection of buffers containing RNA-based QC metrics, filled by compute_rna_qc_metrics() . |
options | Further options for filtering. |
RnaQcFilters< Float_ > scran_qc::compute_rna_qc_filters | ( | const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > & | metrics, |
const ComputeRnaQcFiltersOptions & | options ) |
This function computes filter thresholds for RNA-derived QC metrics in blocked datasets (e.g., cells from multiple batches or samples). Each blocking level has its own thresholds, equivalent to calling compute_rna_qc_filters()
on the cells from each block. This ensures that uninteresting inter-block differences do not inflate the MAD, see choose_filter_thresholds_blocked()
for more details.
Float_ | Floating-point type of the thresholds. |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Proportion_ | Floating-point type to store the proportions. |
metrics | RNA-based QC metrics from compute_rna_qc_metrics() . |
options | Further options for filtering. |
RnaQcBlockedFilters< Float_ > scran_qc::compute_rna_qc_filters_blocked | ( | const std::size_t | num, |
const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > & | metrics, | ||
const Block_ *const | block, | ||
const ComputeRnaQcFiltersOptions & | options ) |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Proportion_ | Floating-point type to store the proportions. |
Block_ | Integer type of the block assignments. |
num | Number of cells. | |
metrics | A collection of buffers containing RNA-based QC metrics, filled by compute_rna_qc_metrics() . | |
[in] | block | Pointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. |
options | Further options for filtering. |
RnaQcBlockedFilters< Float_ > scran_qc::compute_rna_qc_filters_blocked | ( | const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > & | metrics, |
const Block_ *const | block, | ||
const ComputeRnaQcFiltersOptions & | options ) |
Sum_ | Numeric type to store the summed expression. |
Detected_ | Integer type to store the number of cells. |
Proportion_ | Floating-point type to store the proportions. |
Block_ | Integer type of the block assignments. |
metrics | RNA-based QC metrics computed by compute_rna_qc_metrics() . | |
[in] | block | Pointer to an array of length num containing block identifiers. Values should be integer IDs in \([0, N)\) where \(N\) is the number of blocks. |
options | Further options for filtering. |