Source code for scranpy._aggregate_across_cells
from typing import Any, Sequence, Union, Optional
import numpy
import mattress
import biocutils
import biocframe
from . import _lib_scranpy as lib
from ._combine_factors import combine_factors
[docs]def aggregate_across_cells(
x: Any,
factors: Union[dict, Sequence, biocutils.NamedList, biocframe.BiocFrame],
num_threads: int = 1
) -> biocutils.NamedList:
"""
Aggregate expression values across cells based on one or more grouping factors.
This is primarily used to create pseudo-bulk profiles for each cluster/sample combination.
Args:
x:
A matrix-like object where rows correspond to genes or genomic features and columns correspond to cells.
Values are expected to be counts.
factors:
One or more grouping factors, see :py:func:`~scranpy.combine_factors`.
Each entry should be a sequence of length equal to the number of columns in ``x``.
num_threads:
Number of threads to use for aggregation.
Returns:
A :py:class:`~biocutils.named_list.NamedList` containing the following entries.
- ``sum``: double-precision NumPy matrix where each row corresponds to a gene and each column corresponds to a unique combination of grouping levels.
Each matrix entry contains the summed expression across all cells with that combination.
- ``detected``: integer NumPy matrix where each row corresponds to a gene and each column corresponds to a unique combination of grouping levels.
Each matrix entry contains the number of cells with detected expression in that combination.
- ``combinations``: a :py:class:`~biocframe.BiocFrame.BiocFrame` containing all unique combinations of levels across ``factors``.
Each column corresponds to an entry of ``factors`` while each row corresponds to a combination.
Specifically, the ``i``-th combination is defined as the ``i``-th elements of all columns.
Combinations are in the same order as the columns of :py:attr:`~sum` and :py:attr:`~detected`.
- ``counts``: an integer NumPy array containing the number of cells associated with each combination in ``combinations``.
- ``index``: an Integer NumPy array of length equal to the number of cells.
This specifies the combination ``combinations`` associated with each cell in ``x``.
References:
The ``aggregate_across_cells`` function in the `scran_aggregate <https://libscran.github.io/scran_aggregate>`_ C++ library.
Examples:
>>> import numpy
>>> mat = numpy.random.rand(100, 20)
>>> import scranpy
>>> clusters = ["A", "B", "C", "D"] * 5
>>> blocks = [1] * 4 + [2] * 4 + [3] * 4 + [4] * 4 + [5] * 4
>>> aggr = scranpy.aggregate_across_cells(mat, { "clusters": clusters, "blocks": blocks })
>>> aggr["sum"][:5,]
>>> print(aggr["combinations"])
"""
combout = combine_factors(factors)
comblev = combout["levels"]
combind = combout["index"]
mat = mattress.initialize(x)
outsum, outdet = lib.aggregate_across_cells(mat.ptr, combind, num_threads)
counts = numpy.zeros(comblev.shape[0], dtype=numpy.uint32)
for i in combind:
counts[i] += 1
output = biocutils.NamedList()
output["sum"] = outsum
output["detected"] = outdet
output["combinations"] = comblev
output["counts"] = counts
output["index"] = combind
return output