scran_pca/blocked__pca_8hpp_source.html

#ifndef SCRAN_PCA_BLOCKED_PCA_HPP

#define SCRAN_PCA_BLOCKED_PCA_HPP


#include <vector>

#include <cmath>

#include <algorithm>

#include <type_traits>

#include <cstddef>


#include "tatami/tatami.hpp"

#include "irlba/irlba.hpp"

#include "irlba/parallel.hpp"

#include "Eigen/Dense"

#include "scran_blocks/scran_blocks.hpp"

#include "sanisizer/sanisizer.hpp"


#include "utils.hpp"


namespace scran_pca {


struct BlockedPcaOptions {

    BlockedPcaOptions() {

        // Avoid throwing an error if too many PCs are requested.

        irlba_options.cap_number = true;

    }

    int number = 25;


    bool scale = false;


    bool transpose = true;


    scran_blocks::WeightPolicy block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;


    scran_blocks::VariableWeightParameters variable_block_weight_parameters;


    bool components_from_residuals = true;


    bool realize_matrix = true;


    int num_threads = 1;


    irlba::Options irlba_options;

};


namespace internal {


/*****************************************************

 ************* Blocking data structures **************

 *****************************************************/


template<typename Index_, class EigenVector_>

struct BlockingDetails {

    std::vector<Index_> block_size;


    bool weighted = false;

    typedef typename EigenVector_::Scalar Weight;


    // The below should only be used if weighted = true.

    std::vector<Weight> per_element_weight;

    Weight total_block_weight = 0;

    EigenVector_ expanded_weights;

};


template<class EigenVector_, typename Index_, typename Block_>

BlockingDetails<Index_, EigenVector_> compute_blocking_details(

    const Index_ ncells,

    const Block_* block,

    const scran_blocks::WeightPolicy block_weight_policy,

    const scran_blocks::VariableWeightParameters& variable_block_weight_parameters)

{

    BlockingDetails<Index_, EigenVector_> output;

    output.block_size = tatami_stats::tabulate_groups(block, ncells);

    if (block_weight_policy == scran_blocks::WeightPolicy::NONE) {

        return output;

    }


    const auto& block_size = output.block_size;

    const auto nblocks = block_size.size();

    output.weighted = true;

    auto& total_weight = output.total_block_weight;

    auto& element_weight = output.per_element_weight;

    sanisizer::resize(element_weight, nblocks);


    for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

        const auto bsize = block_size[b];


        // Computing effective block weights that also incorporate division by the

        // block size. This avoids having to do the division by block size in the

        // 'compute_blockwise_mean_and_variance*()' functions.

        if (bsize) {

            typename EigenVector_::Scalar block_weight = 1;

            if (block_weight_policy == scran_blocks::WeightPolicy::VARIABLE) {

                block_weight = scran_blocks::compute_variable_weight(bsize, variable_block_weight_parameters);

            }


            element_weight[b] = block_weight / bsize;

            total_weight += block_weight;

        } else {

            element_weight[b] = 0;

        }

    }


    // Setting a placeholder value to avoid problems with division by zero.

    if (total_weight == 0) {

        total_weight = 1;

    }


    // Expanding them for multiplication in the IRLBA wrappers.

    auto sqrt_weights = element_weight;

    for (auto& s : sqrt_weights) {

        s = std::sqrt(s);

    }


    auto& expanded = output.expanded_weights;

    sanisizer::resize(expanded, ncells);

    for (Index_ c = 0; c < ncells; ++c) {

        expanded.coeffRef(c) = sqrt_weights[block[c]];

    }


    return output;

}


/*****************************************************************

 ************ Computing the blockwise mean and variance **********

 *****************************************************************/


template<typename Num_, typename Value_, typename Index_, typename Block_, typename EigenVector_, typename Float_>

void compute_sparse_mean_and_variance_blocked(

    const Num_ num_nonzero,

    const Value_* values,

    const Index_* indices,

    const Block_* block,

    const BlockingDetails<Index_, EigenVector_>& block_details,

    Float_* centers,

    Float_& variance,

    std::vector<Index_>& block_copy,

    const Num_ num_all)

{

    const auto& block_size = block_details.block_size;

    const auto nblocks = block_size.size();


    std::fill_n(centers, nblocks, 0);

    for (Num_ i = 0; i < num_nonzero; ++i) {

        centers[block[indices[i]]] += values[i];

    }

    for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

        auto bsize = block_size[b];

        if (bsize) {

            centers[b] /= bsize;

        }

    }


    // Computing the variance from the sum of squared differences.

    // This is technically not the correct variance estimate if we

    // were to consider the loss of residual d.f. from estimating

    // the block means, but it's what the PCA sees, so whatever.

    variance = 0;

    std::copy(block_size.begin(), block_size.end(), block_copy.begin());


    if (block_details.weighted) {

        for (Num_ i = 0; i < num_nonzero; ++i) {

            const Block_ curb = block[indices[i]];

            const auto diff = values[i] - centers[curb];

            variance += diff * diff * block_details.per_element_weight[curb];

            --block_copy[curb];

        }

        for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

            const auto val = centers[b];

            variance += val * val * block_copy[b] * block_details.per_element_weight[b];

        }

    } else {

        for (Num_ i = 0; i < num_nonzero; ++i) {

            const Block_ curb = block[indices[i]];

            const auto diff = values[i] - centers[curb];

            variance += diff * diff;

            --block_copy[curb];

        }

        for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

            const auto val = centers[b];

            variance += val * val * block_copy[b];

        }

    }


    // COMMENT ON DENOMINATOR:

    // If we're not dealing with weights, we compute the actual sample

    // variance for easy interpretation (and to match up with the

    // per-PC calculations in internal::clean_up).

    //

    // If we're dealing with weights, the concept of the sample variance

    // becomes somewhat weird, but we just use the same denominator for

    // consistency in clean_up_projected. Magnitude doesn't matter when

    // scaling for internal::process_scale_vector anyway.

    variance /= num_all - 1;

}


template<class IrlbaSparseMatrix_, typename Block_, class Index_, class EigenVector_, class EigenMatrix_>

void compute_blockwise_mean_and_variance_realized_sparse(

    const IrlbaSparseMatrix_& emat, // this should be column-major with genes in the columns.

    const Block_* block,

    const BlockingDetails<Index_, EigenVector_>& block_details,

    EigenMatrix_& centers,

    EigenVector_& variances,

    const int nthreads)

{

    const auto ngenes = emat.cols();

    tatami::parallelize([&](const int, const decltype(I(ngenes)) start, const decltype(I(ngenes)) length) -> void {

        const auto ncells = emat.rows();

        const auto& values = emat.get_values();

        const auto& indices = emat.get_indices();

        const auto& pointers = emat.get_pointers();


        const auto nblocks = block_details.block_size.size();

        static_assert(!EigenMatrix_::IsRowMajor);

        auto block_copy = sanisizer::create<std::vector<Index_> >(nblocks);


        for (decltype(I(start)) g = start, end = start + length; g < end; ++g) {

            const auto offset = pointers[g];

            const auto next_offset = pointers[g + 1]; // increment won't overflow as 'g < end' and 'end' is of the same type.

            compute_sparse_mean_and_variance_blocked(

                static_cast<decltype(I(ncells))>(next_offset - offset),

                values.data() + offset,

                indices.data() + offset,

                block,

                block_details,

                centers.data() + sanisizer::product_unsafe<std::size_t>(g, nblocks),

                variances[g],

                block_copy,

                ncells

            );

        }

    }, ngenes, nthreads);

}


template<typename Num_, typename Value_, typename Block_, typename Index_, typename EigenVector_, typename Float_>

void compute_dense_mean_and_variance_blocked(

    const Num_ number,

    const Value_* values,

    const Block_* block,

    const BlockingDetails<Index_, EigenVector_>& block_details,

    Float_* centers,

    Float_& variance)

{

    const auto& block_size = block_details.block_size;

    const auto nblocks = block_size.size();

    std::fill_n(centers, nblocks, 0);

    for (Num_ i = 0; i < number; ++i) {

        centers[block[i]] += values[i];

    }

    for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

        const auto& bsize = block_size[b];

        if (bsize) {

            centers[b] /= bsize;

        }

    }


    variance = 0;


    if (block_details.weighted) {

        for (Num_ i = 0; i < number; ++i) {

            const auto curb = block[i];

            const auto delta = values[i] - centers[curb];

            variance += delta * delta * block_details.per_element_weight[curb];

        }

    } else {

        for (Num_ i = 0; i < number; ++i) {

            const auto curb = block[i];

            const auto delta = values[i] - centers[curb];

            variance += delta * delta;

        }

    }


    variance /= number - 1; // See COMMENT ON DENOMINATOR above.

}


template<class EigenMatrix_, typename Block_, class Index_, class EigenVector_>

void compute_blockwise_mean_and_variance_realized_dense(

    const EigenMatrix_& emat, // this should be column-major with genes in the columns.

    const Block_* block,

    const BlockingDetails<Index_, EigenVector_>& block_details,

    EigenMatrix_& centers,

    EigenVector_& variances,

    const int nthreads)

{

    const auto ngenes = emat.cols();

    tatami::parallelize([&](const int, const decltype(I(ngenes)) start, const decltype(I(ngenes)) length) -> void {

        const auto ncells = emat.rows();

        static_assert(!EigenMatrix_::IsRowMajor);

        const auto nblocks = block_details.block_size.size();

        for (decltype(I(start)) g = start, end = start + length; g < end; ++g) {

            compute_dense_mean_and_variance_blocked(

                ncells,

                emat.data() + sanisizer::product_unsafe<std::size_t>(g, ncells),

                block,

                block_details,

                centers.data() + sanisizer::product_unsafe<std::size_t>(g, nblocks),

                variances[g]

            );

        }

    }, ngenes, nthreads);

}


template<typename Value_, typename Index_, typename Block_, class EigenMatrix_, class EigenVector_>

void compute_blockwise_mean_and_variance_tatami(

    const tatami::Matrix<Value_, Index_>& mat, // this should have genes in the rows!

    const Block_* block,

    const BlockingDetails<Index_, EigenVector_>& block_details,

    EigenMatrix_& centers,

    EigenVector_& variances,

    const int nthreads)

{

    const auto& block_size = block_details.block_size;

    const auto nblocks = block_size.size();

    const Index_ ngenes = mat.nrow();

    const Index_ ncells = mat.ncol();


    if (mat.prefer_rows()) {

        tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {

            static_assert(!EigenMatrix_::IsRowMajor);

            auto block_copy = sanisizer::create<std::vector<Index_> >(nblocks);

            auto vbuffer = tatami::create_container_of_Index_size<std::vector<Value_> >(ncells);


            if (mat.is_sparse()) {

                auto ibuffer = tatami::create_container_of_Index_size<std::vector<Index_> >(ncells);

                auto ext = tatami::consecutive_extractor<true>(mat, true, start, length);

                for (Index_ g = start, end = start + length; g < end; ++g) {

                    auto range = ext->fetch(vbuffer.data(), ibuffer.data());

                    compute_sparse_mean_and_variance_blocked(

                        range.number,

                        range.value,

                        range.index,

                        block,

                        block_details,

                        centers.data() + sanisizer::product_unsafe<std::size_t>(g, nblocks),

                        variances[g],

                        block_copy,

                        ncells

                    );

                }

            } else {

                auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);

                for (Index_ g = start, end = start + length; g < end; ++g) {

                    auto ptr = ext->fetch(vbuffer.data());

                    compute_dense_mean_and_variance_blocked(

                        ncells,

                        ptr,

                        block,

                        block_details,

                        centers.data() + sanisizer::product_unsafe<std::size_t>(g, nblocks),

                        variances[g]

                    );

                }

            }

        }, ngenes, nthreads);


    } else {

        typedef typename EigenVector_::Scalar Scalar;

        std::vector<std::pair<decltype(I(nblocks)), Scalar> > block_multipliers;

        block_multipliers.reserve(nblocks);


        for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

            const auto bsize = block_size[b];

            if (bsize > 1) { // skipping blocks with NaN variances.

                Scalar mult = bsize - 1; // need to convert variances back into sum of squared differences.

                if (block_details.weighted) {

                    mult *= block_details.per_element_weight[b];

                }

                block_multipliers.emplace_back(b, mult);

            }

        }


        tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {

            std::vector<std::vector<Scalar> > re_centers, re_variances;

            re_centers.reserve(nblocks);

            re_variances.reserve(nblocks);

            for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

                re_centers.emplace_back(length);

                re_variances.emplace_back(length);

            }


            auto vbuffer = tatami::create_container_of_Index_size<std::vector<Value_> >(length);


            if (mat.is_sparse()) {

                std::vector<tatami_stats::variances::RunningSparse<Scalar, Value_, Index_> > running;

                running.reserve(nblocks);

                for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

                    running.emplace_back(length, re_centers[b].data(), re_variances[b].data(), /* skip_nan = */ false, /* subtract = */ start);

                }


                auto ibuffer = tatami::create_container_of_Index_size<std::vector<Index_> >(length);

                auto ext = tatami::consecutive_extractor<true>(mat, false, static_cast<Index_>(0), ncells, start, length);

                for (Index_ c = 0; c < ncells; ++c) {

                    const auto range = ext->fetch(vbuffer.data(), ibuffer.data());

                    running[block[c]].add(range.value, range.index, range.number);

                }


                for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

                    running[b].finish();

                }


            } else {

                std::vector<tatami_stats::variances::RunningDense<Scalar, Value_, Index_> > running;

                running.reserve(nblocks);

                for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

                    running.emplace_back(length, re_centers[b].data(), re_variances[b].data(), /* skip_nan = */ false);

                }


                auto ext = tatami::consecutive_extractor<false>(mat, false, static_cast<Index_>(0), ncells, start, length);

                for (Index_ c = 0; c < ncells; ++c) {

                    auto ptr = ext->fetch(vbuffer.data());

                    running[block[c]].add(ptr);

                }


                for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

                    running[b].finish();

                }

            }


            static_assert(!EigenMatrix_::IsRowMajor);

            for (Index_ i = 0; i < length; ++i) {

                auto mptr = centers.data() + sanisizer::product_unsafe<std::size_t>(start + i, nblocks);

                for (decltype(I(nblocks)) b = 0; b < nblocks; ++b) {

                    mptr[b] = re_centers[b][i];

                }


                auto& my_var = variances[start + i];

                my_var = 0;

                for (const auto& bm : block_multipliers) {

                    my_var += re_variances[bm.first][i] * bm.second;

                }

                my_var /= ncells - 1; // See COMMENT ON DENOMINATOR above.

            }

        }, ngenes, nthreads);

    }

}


/******************************************************************

 ************ Project matrices on their rotation vectors **********

 ******************************************************************/


template<class EigenMatrix_, class EigenVector_>

const EigenMatrix_& scale_rotation_matrix(const EigenMatrix_& rotation, bool scale, const EigenVector_& scale_v, EigenMatrix_& tmp) {

    if (scale) {

        tmp = (rotation.array().colwise() / scale_v.array()).matrix();

        return tmp;

    } else {

        return rotation;

    }

}


template<class IrlbaSparseMatrix_, class EigenMatrix_>

inline void project_matrix_realized_sparse(

    const IrlbaSparseMatrix_& emat, // cell in rows, genes in the columns, CSC.

    EigenMatrix_& components, // dims in rows, cells in columns

    const EigenMatrix_& scaled_rotation, // genes in rows, dims in columns

    int nthreads)

{

    const auto rank = scaled_rotation.cols();

    const auto ncells = emat.rows();

    const auto ngenes = emat.cols();


    // Store as transposed for more cache efficiency.

    components.resize(

        sanisizer::cast<decltype(I(components.rows()))>(rank),

        sanisizer::cast<decltype(I(components.cols()))>(ncells)

    );

    components.setZero();


    const auto& values = emat.get_values();

    const auto& indices = emat.get_indices();


    if (nthreads == 1) {

        const auto& pointers = emat.get_pointers();

        auto multipliers = sanisizer::create<Eigen::VectorXd>(rank);

        for (decltype(I(ngenes)) g = 0; g < ngenes; ++g) {

            multipliers.noalias() = scaled_rotation.row(g);

            const auto start = pointers[g], end = pointers[g + 1]; // increment is safe as 'g + 1 <= ngenes'.

            for (auto i = start; i < end; ++i) {

                components.col(indices[i]).noalias() += values[i] * multipliers;

            }

        }


    } else {

        const auto& row_nonzero_starts = emat.get_secondary_nonzero_starts();

        irlba::parallelize(nthreads, [&](const int t) -> void {

            const auto& starts = row_nonzero_starts[t];

            const auto& ends = row_nonzero_starts[t + 1]; // increment is safe as 't + 1 <= nthreads'.

            auto multipliers = sanisizer::create<Eigen::VectorXd>(rank);


            for (decltype(I(ngenes)) g = 0; g < ngenes; ++g) {

                multipliers.noalias() = scaled_rotation.row(g);

                const auto start = starts[g], end = ends[g];

                for (auto i = start; i < end; ++i) {

                    components.col(indices[i]).noalias() += values[i] * multipliers;

                }

            }

        });

    }

}


template<typename Value_, typename Index_, class EigenMatrix_>

void project_matrix_transposed_tatami(

    const tatami::Matrix<Value_, Index_>& mat, // genes in rows, cells in columns

    EigenMatrix_& components,

    const EigenMatrix_& scaled_rotation, // genes in rows, dims in columns

    const int nthreads)

{

    const auto rank = scaled_rotation.cols();

    const auto ngenes = mat.nrow();

    const auto ncells = mat.ncol();

    typedef typename EigenMatrix_::Scalar Scalar;


    // Store as transposed for more cache efficiency.

    components.resize(

        sanisizer::cast<decltype(I(components.rows()))>(rank),

        sanisizer::cast<decltype(I(components.cols()))>(ncells)

    );


    if (mat.prefer_rows()) {

        tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {

            static_assert(!EigenMatrix_::IsRowMajor);

            const auto vptr = scaled_rotation.data();

            auto vbuffer = tatami::create_container_of_Index_size<std::vector<Value_> >(length);


            std::vector<std::vector<Scalar> > local_buffers; // create separate buffers to avoid false sharing.

            local_buffers.reserve(rank);

            for (decltype(I(rank)) r = 0; r < rank; ++r) {

                local_buffers.emplace_back(tatami::cast_Index_to_container_size<decltype(I(local_buffers.front()))>(length));

            }


            if (mat.is_sparse()) {

                auto ibuffer = tatami::create_container_of_Index_size<std::vector<Index_> >(length);

                auto ext = tatami::consecutive_extractor<true>(mat, true, static_cast<Index_>(0), ngenes, start, length);

                for (Index_ g = 0; g < ngenes; ++g) {

                    const auto range = ext->fetch(vbuffer.data(), ibuffer.data());

                    for (decltype(I(rank)) r = 0; r < rank; ++r) {

                        const auto mult = vptr[sanisizer::nd_offset<std::size_t>(g, ngenes, r)];

                        auto& local_buffer = local_buffers[r];

                        for (Index_ i = 0; i < range.number; ++i) {

                            local_buffer[range.index[i] - start] += range.value[i] * mult;

                        }

                    }

                }


            } else {

                auto ext = tatami::consecutive_extractor<false>(mat, true, static_cast<Index_>(0), ngenes, start, length);

                for (Index_ g = 0; g < ngenes; ++g) {

                    const auto ptr = ext->fetch(vbuffer.data());

                    for (decltype(I(rank)) r = 0; r < rank; ++r) {

                        const auto mult = vptr[sanisizer::nd_offset<std::size_t>(g, ngenes, r)];

                        auto& local_buffer = local_buffers[r];

                        for (Index_ i = 0; i < length; ++i) {

                            local_buffer[i] += ptr[i] * mult;

                        }

                    }

                }

            }


            for (decltype(I(rank)) r = 0; r < rank; ++r) {

                for (Index_ c = 0; c < length; ++c) {

                    components.coeffRef(r, c + start) = local_buffers[r][c];

                }

            }


        }, ncells, nthreads);


    } else {

        tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {

            static_assert(!EigenMatrix_::IsRowMajor);

            auto vbuffer = tatami::create_container_of_Index_size<std::vector<Value_> >(ngenes);


            if (mat.is_sparse()) {

                std::vector<Index_> ibuffer(ngenes);

                auto ext = tatami::consecutive_extractor<true>(mat, false, start, length);


                for (Index_ c = start, end = start + length; c < end; ++c) {

                    const auto range = ext->fetch(vbuffer.data(), ibuffer.data());

                    static_assert(!EigenMatrix_::IsRowMajor);

                    for (decltype(I(rank)) r = 0; r < rank; ++r) {

                        auto& output = components.coeffRef(r, c);

                        output = 0;

                        const auto rotptr = scaled_rotation.data() + sanisizer::product_unsafe<std::size_t>(r, ngenes);

                        for (Index_ i = 0; i < range.number; ++i) {

                            output += rotptr[range.index[i]] * range.value[i];

                        }

                    }

                }


            } else {

                auto ext = tatami::consecutive_extractor<false>(mat, false, start, length);

                for (Index_ c = start, end = start + length; c < end; ++c) {

                    const auto ptr = ext->fetch(vbuffer.data());

                    static_assert(!EigenMatrix_::IsRowMajor);

                    for (decltype(I(rank)) r = 0; r < rank; ++r) {

                        const auto rotptr = scaled_rotation.data() + sanisizer::product_unsafe<std::size_t>(r, ngenes);

                        components.coeffRef(r, c) = std::inner_product(rotptr, rotptr + ngenes, ptr, static_cast<Scalar>(0));

                    }

                }

            }

        }, ncells, nthreads);

    }

}


template<class EigenMatrix_, class EigenVector_>

void clean_up_projected(EigenMatrix_& projected, EigenVector_& D) {

    // Empirically centering to give nice centered PCs, because we can't

    // guarantee that the projection is centered in this manner.

    for (decltype(I(projected.rows())) i = 0, prows = projected.rows(); i < prows; ++i) {

        projected.row(i).array() -= projected.row(i).sum() / projected.cols();

    }


    // Just dividing by the number of observations - 1 regardless of weighting.

    const typename EigenMatrix_::Scalar denom = projected.cols() - 1;

    for (auto& d : D) {

        d = d * d / denom;

    }

}


/*******************************

 ***** Residual wrapper ********

 *******************************/


// This wrapper class mimics multiplication with the residuals,

// i.e., after subtracting the per-block mean from each cell.

template<class Matrix_, typename Block_, class EigenMatrix_, class EigenVector_>

class ResidualWrapper {

public:

    ResidualWrapper(const Matrix_& mat, const Block_* block, const EigenMatrix_& means) : my_mat(mat), my_block(block), my_means(means) {}


public:

    Eigen::Index rows() const { return my_mat.rows(); }

    Eigen::Index cols() const { return my_mat.cols(); }


public:

    struct Workspace {

        template<typename NumBlocks_>

        Workspace(NumBlocks_ nblocks, irlba::WrappedWorkspace<Matrix_> c) :

            sub(sanisizer::cast<decltype(I(sub.size()))>(nblocks)),

            child(std::move(c))

        {}


        EigenVector_ sub;

        EigenVector_ holding;

        irlba::WrappedWorkspace<Matrix_> child;

    };


    Workspace workspace() const {

        return Workspace(my_means.rows(), irlba::wrapped_workspace(my_mat));

    }


    template<class Right_>

    void multiply(const Right_& rhs, Workspace& work, EigenVector_& output) const {

        const auto& realized_rhs = [&]() -> const auto& {

            if constexpr(std::is_same<Right_, EigenVector_>::value) {

                return rhs;

            } else {

                work.holding.noalias() = rhs;

                return work.holding;

            }

        }();


        irlba::wrapped_multiply(my_mat, realized_rhs, work.child, output);


        work.sub.noalias() = my_means * realized_rhs;

        for (decltype(I(output.size())) i = 0, end = output.size(); i < end; ++i) {

            auto& val = output.coeffRef(i);

            val -= work.sub.coeff(my_block[i]);

        }

    }


public:

    struct AdjointWorkspace {

        template<typename NumBlocks_>

        AdjointWorkspace(NumBlocks_ nblocks, irlba::WrappedAdjointWorkspace<Matrix_> c) :

            aggr(sanisizer::cast<decltype(I(aggr.size()))>(nblocks)),

            child(std::move(c))

        {}


        EigenVector_ aggr;

        EigenVector_ holding;

        irlba::WrappedAdjointWorkspace<Matrix_> child;

    };


    AdjointWorkspace adjoint_workspace() const {

        return AdjointWorkspace(my_means.rows(), irlba::wrapped_adjoint_workspace(my_mat));

    }


    template<class Right_>

    void adjoint_multiply(const Right_& rhs, AdjointWorkspace& work, EigenVector_& output) const {

        const auto& realized_rhs = [&]() {

            if constexpr(std::is_same<Right_, EigenVector_>::value) {

                return rhs;

            } else {

                work.holding.noalias() = rhs;

                return work.holding;

            }

        }();


        irlba::wrapped_adjoint_multiply(my_mat, realized_rhs, work.child, output);


        work.aggr.setZero();

        for (decltype(I(realized_rhs.size())) i = 0, end = realized_rhs.size(); i < end; ++i) {

            work.aggr.coeffRef(my_block[i]) += realized_rhs.coeff(i);

        }


        output.noalias() -= my_means.adjoint() * work.aggr;

    }


public:

    template<class EigenMatrix2_>

    EigenMatrix2_ realize() const {

        EigenMatrix2_ output = irlba::wrapped_realize<EigenMatrix2_>(my_mat);

        for (decltype(I(output.rows())) i = 0, end = output.rows(); i < end; ++i) {

            output.row(i) -= my_means.row(my_block[i]);

        }

        return output;

    }


private:

    const Matrix_& my_mat;

    const Block_* my_block;

    const EigenMatrix_& my_means;

};


/**************************

 ***** Dispatchers ********

 **************************/


template<bool realize_matrix_, bool sparse_, typename Value_, typename Index_, typename Block_, class EigenMatrix_, class EigenVector_>

void run_blocked(

    const tatami::Matrix<Value_, Index_>& mat,

    const Block_* block,

    const BlockingDetails<Index_, EigenVector_>& block_details,

    const BlockedPcaOptions& options,

    EigenMatrix_& components,

    EigenMatrix_& rotation,

    EigenVector_& variance_explained,

    EigenMatrix_& center_m,

    EigenVector_& scale_v,

    typename EigenVector_::Scalar& total_var,

    bool& converged)

{

    Index_ ngenes = mat.nrow(), ncells = mat.ncol();


    auto emat = [&]{

        if constexpr(!realize_matrix_) {

            return internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads);


        } else if constexpr(sparse_) {

            // 'extracted' contains row-major contents... but we implicitly transpose it to CSC with genes in columns.

            auto extracted = tatami::retrieve_compressed_sparse_contents<Value_, Index_>(

                mat,

                /* row = */ true,

                [&]{

                    tatami::RetrieveCompressedSparseContentsOptions opt;

                    opt.two_pass = false;

                    opt.num_threads = options.num_threads;

                    return opt;

                }()

            );

            return irlba::ParallelSparseMatrix(ncells, ngenes, std::move(extracted.value), std::move(extracted.index), std::move(extracted.pointers), true, options.num_threads);


        } else {

            // Perform an implicit transposition by performing a row-major extraction into a column-major transposed matrix.

            EigenMatrix_ emat(

                sanisizer::cast<decltype(I(std::declval<EigenMatrix_>().rows()))>(ncells),

                sanisizer::cast<decltype(I(std::declval<EigenMatrix_>().cols()))>(ngenes)

            );

            static_assert(!EigenMatrix_::IsRowMajor);

            tatami::convert_to_dense(

                mat,

                /* row_major = */ true,

                emat.data(),

                [&]{

                    tatami::ConvertToDenseOptions opt;

                    opt.num_threads = options.num_threads;

                    return opt;

                }()

            );

            return emat;

        }

    }();


    const auto nblocks = block_details.block_size.size();

    center_m.resize(

        sanisizer::cast<decltype(I(center_m.rows()))>(nblocks),

        sanisizer::cast<decltype(I(center_m.cols()))>(ngenes)

    );

    sanisizer::resize(scale_v, ngenes);


    if constexpr(!realize_matrix_) {

        compute_blockwise_mean_and_variance_tatami(mat, block, block_details, center_m, scale_v, options.num_threads);

    } else if constexpr(sparse_) {

        compute_blockwise_mean_and_variance_realized_sparse(emat, block, block_details, center_m, scale_v, options.num_threads);

    } else {

        compute_blockwise_mean_and_variance_realized_dense(emat, block, block_details, center_m, scale_v, options.num_threads);

    }

    total_var = internal::process_scale_vector(options.scale, scale_v);


    ResidualWrapper<decltype(I(emat)), Block_, EigenMatrix_, EigenVector_> centered(emat, block, center_m);


    if (block_details.weighted) {

        if (options.scale) {

            irlba::Scaled<true, decltype(I(centered)), EigenVector_> scaled(centered, scale_v, /* divide = */ true);

            irlba::Scaled<false, decltype(I(scaled)), EigenVector_> weighted(scaled, block_details.expanded_weights, /* divide = */ false);

            auto out = irlba::compute(weighted, options.number, components, rotation, variance_explained, options.irlba_options);

            converged = out.first;

        } else {

            irlba::Scaled<false, decltype(I(centered)), EigenVector_> weighted(centered, block_details.expanded_weights, /* divide = */ false);

            auto out = irlba::compute(weighted, options.number, components, rotation, variance_explained, options.irlba_options);

            converged = out.first;

        }


        EigenMatrix_ tmp;

        const auto& scaled_rotation = scale_rotation_matrix(rotation, options.scale, scale_v, tmp);


        // This transposes 'components' to be a NDIM * NCELLS matrix.

        if constexpr(!realize_matrix_) {

            project_matrix_transposed_tatami(mat, components, scaled_rotation, options.num_threads);

        } else if constexpr(sparse_) {

            project_matrix_realized_sparse(emat, components, scaled_rotation, options.num_threads);

        } else {

            components.noalias() = (emat * scaled_rotation).adjoint();

        }


        // Subtracting each block's mean from the PCs.

        if (options.components_from_residuals) {

            EigenMatrix_ centering = (center_m * scaled_rotation).adjoint();

            for (decltype(I(ncells)) c =0 ; c < ncells; ++c) {

                components.col(c) -= centering.col(block[c]);

            }

        }


        clean_up_projected(components, variance_explained);

        if (!options.transpose) {

            components.adjointInPlace();

        }


    } else {

        if (options.scale) {

            irlba::Scaled<true, decltype(I(centered)), EigenVector_> scaled(centered, scale_v, /* divide = */ true);

            const auto out = irlba::compute(scaled, options.number, components, rotation, variance_explained, options.irlba_options);

            converged = out.first;

        } else {

            const auto out = irlba::compute(centered, options.number, components, rotation, variance_explained, options.irlba_options);

            converged = out.first;

        }


        if (options.components_from_residuals) {

            internal::clean_up(mat.ncol(), components, variance_explained);

            if (options.transpose) {

                components.adjointInPlace();

            }

        } else {

            EigenMatrix_ tmp;

            const auto& scaled_rotation = scale_rotation_matrix(rotation, options.scale, scale_v, tmp);


            // This transposes 'components' to be a NDIM * NCELLS matrix.

            if constexpr(!realize_matrix_) {

                project_matrix_transposed_tatami(mat, components, scaled_rotation, options.num_threads);

            } else if constexpr(sparse_) {

                project_matrix_realized_sparse(emat, components, scaled_rotation, options.num_threads);

            } else {

                components.noalias() = (emat * scaled_rotation).adjoint();

            }


            clean_up_projected(components, variance_explained);

            if (!options.transpose) {

                components.adjointInPlace();

            }

        }

    }

}


}

template<typename EigenMatrix_, typename EigenVector_>


struct BlockedPcaResults {

    EigenMatrix_ components;


    EigenVector_ variance_explained;


    typename EigenVector_::Scalar total_variance = 0;


    EigenMatrix_ rotation;


    EigenMatrix_ center;


    EigenVector_ scale;


    bool converged = false;

};


template<typename Value_, typename Index_, typename Block_, typename EigenMatrix_, class EigenVector_>


void blocked_pca(const tatami::Matrix<Value_, Index_>& mat, const Block_* block, const BlockedPcaOptions& options, BlockedPcaResults<EigenMatrix_, EigenVector_>& output) {

    irlba::EigenThreadScope t(options.num_threads);

    auto bdetails = internal::compute_blocking_details<EigenVector_>(mat.ncol(), block, options.block_weight_policy, options.variable_block_weight_parameters);


    EigenMatrix_& components = output.components;

    EigenMatrix_& rotation = output.rotation;

    EigenVector_& variance_explained = output.variance_explained;

    EigenMatrix_& center_m = output.center;

    EigenVector_& scale_v = output.scale;

    auto& total_var = output.total_variance;

    bool& converged = output.converged;


    if (mat.sparse()) {

        if (options.realize_matrix) {

            internal::run_blocked<true, true>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);

        } else {

            internal::run_blocked<false, true>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);

        }

    } else {

        if (options.realize_matrix) {

            internal::run_blocked<true, false>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);

        } else {

            internal::run_blocked<false, false>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);

        }

    }


    if (!options.scale) {

        output.scale = EigenVector_();

    }

}


template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_, typename Block_>


BlockedPcaResults<EigenMatrix_, EigenVector_> blocked_pca(const tatami::Matrix<Value_, Index_>& mat, const Block_* block, const BlockedPcaOptions& options) {

    BlockedPcaResults<EigenMatrix_, EigenVector_> output;

    blocked_pca(mat, block, options, output);

    return output;

}


}


#endif

irlba::EigenThreadScope

irlba::ParallelSparseMatrix

irlba::Scaled

tatami::Matrix

tatami::Matrix::ncol
virtual Index_ ncol() const=0

tatami::Matrix::nrow
virtual Index_ nrow() const=0

tatami::Matrix::prefer_rows
virtual bool prefer_rows() const=0

tatami::Matrix::is_sparse
virtual bool is_sparse() const=0

tatami::Matrix::sparse
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0

irlba.hpp

irlba::wrapped_adjoint_multiply
void wrapped_adjoint_multiply(const Matrix_ &matrix, const Right_ &rhs, WrappedAdjointWorkspace< Matrix_ > &work, EigenVector_ &out)

irlba::wrapped_workspace
WrappedWorkspace< Matrix_ > wrapped_workspace(const Matrix_ &matrix)

irlba::compute
std::pair< bool, int > compute(const Matrix_ &matrix, Eigen::Index number, EigenMatrix_ &outU, EigenMatrix_ &outV, EigenVector_ &outD, const Options &options)

irlba::parallelize
void parallelize(Task_ num_tasks, Run_ run_task)

irlba::wrapped_multiply
void wrapped_multiply(const Matrix_ &matrix, const Right_ &rhs, WrappedWorkspace< Matrix_ > &work, EigenVector_ &out)

irlba::wrapped_adjoint_workspace
WrappedAdjointWorkspace< Matrix_ > wrapped_adjoint_workspace(const Matrix_ &matrix)

irlba::WrappedAdjointWorkspace
typename get_adjoint_workspace< Matrix_ >::type WrappedAdjointWorkspace

irlba::wrapped_realize
EigenMatrix_ wrapped_realize(const Matrix_ &matrix)

irlba::WrappedWorkspace
typename get_workspace< Matrix_ >::type WrappedWorkspace

scran_blocks::compute_variable_weight
double compute_variable_weight(double s, const VariableWeightParameters &params)

scran_blocks::WeightPolicy
WeightPolicy

scran_pca
Principal component analysis on single-cell data.

scran_pca::blocked_pca
void blocked_pca(const tatami::Matrix< Value_, Index_ > &mat, const Block_ *block, const BlockedPcaOptions &options, BlockedPcaResults< EigenMatrix_, EigenVector_ > &output)
Definition blocked_pca.hpp:1045

tatami::retrieve_compressed_sparse_contents
CompressedSparseContents< StoredValue_, StoredIndex_, StoredPointer_ > retrieve_compressed_sparse_contents(const Matrix< InputValue_, InputIndex_ > &matrix, bool row, const RetrieveCompressedSparseContentsOptions &options)

tatami::cast_Index_to_container_size
decltype(std::declval< Container_ >().size()) cast_Index_to_container_size(Index_ x)

tatami::parallelize
void parallelize(Function_ fun, Index_ tasks, int threads)

tatami::create_container_of_Index_size
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)

tatami::convert_to_dense
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > &matrix, bool row_major, StoredValue_ *store, const ConvertToDenseOptions &options)

tatami::consecutive_extractor
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)

parallel.hpp

scran_blocks.hpp

irlba::Options

irlba::Options::cap_number
bool cap_number

scran_blocks::VariableWeightParameters

scran_pca::BlockedPcaOptions
Options for blocked_pca().
Definition blocked_pca.hpp:30

scran_pca::BlockedPcaOptions::variable_block_weight_parameters
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition blocked_pca.hpp:78

scran_pca::BlockedPcaOptions::num_threads
int num_threads
Definition blocked_pca.hpp:97

scran_pca::BlockedPcaOptions::components_from_residuals
bool components_from_residuals
Definition blocked_pca.hpp:85

scran_pca::BlockedPcaOptions::realize_matrix
bool realize_matrix
Definition blocked_pca.hpp:91

scran_pca::BlockedPcaOptions::irlba_options
irlba::Options irlba_options
Definition blocked_pca.hpp:102

scran_pca::BlockedPcaOptions::transpose
bool transpose
Definition blocked_pca.hpp:61

scran_pca::BlockedPcaOptions::block_weight_policy
scran_blocks::WeightPolicy block_weight_policy
Definition blocked_pca.hpp:72

scran_pca::BlockedPcaOptions::number
int number
Definition blocked_pca.hpp:47

scran_pca::BlockedPcaOptions::scale
bool scale
Definition blocked_pca.hpp:55

scran_pca::BlockedPcaResults
Results of blocked_pca().
Definition blocked_pca.hpp:949

scran_pca::BlockedPcaResults::total_variance
EigenVector_::Scalar total_variance
Definition blocked_pca.hpp:969

scran_pca::BlockedPcaResults::converged
bool converged
Definition blocked_pca.hpp:994

scran_pca::BlockedPcaResults::components
EigenMatrix_ components
Definition blocked_pca.hpp:956

scran_pca::BlockedPcaResults::rotation
EigenMatrix_ rotation
Definition blocked_pca.hpp:976

scran_pca::BlockedPcaResults::scale
EigenVector_ scale
Definition blocked_pca.hpp:989

scran_pca::BlockedPcaResults::center
EigenMatrix_ center
Definition blocked_pca.hpp:983

scran_pca::BlockedPcaResults::variance_explained
EigenVector_ variance_explained
Definition blocked_pca.hpp:963

tatami::RetrieveCompressedSparseContentsOptions

tatami::RetrieveCompressedSparseContentsOptions::num_threads
int num_threads

tatami::RetrieveCompressedSparseContentsOptions::two_pass
bool two_pass

tatami.hpp