Source code for cassiopeia.simulator.UniformLeafSubsampler
"""
A subclass of LeafSubsampler, the UniformLeafSubsampler.
Takes a uniform random sample of the leaves of a CassiopeiaTree and produces a
new CassiopeiaTree that keeps only the lineages pertaining to the sample.
"""
import abc
import copy
import networkx as nx
import numpy as np
from typing import Optional
from cassiopeia.data import CassiopeiaTree
from cassiopeia.simulator.LeafSubsampler import (
LeafSubsampler,
LeafSubsamplerError,
)
[docs]class UniformLeafSubsampler(LeafSubsampler):
def __init__(
self,
ratio: Optional[float] = None,
number_of_leaves: Optional[int] = None,
):
"""
Uniformly subsample leaf samples of a CassiopeiaTree.
If 'ratio' is provided, samples 'ratio' of the leaves, rounded down,
uniformly at random. If instead 'number_of_leaves' is provided,
'number_of_leaves' of the leaves are sampled uniformly at random. Only
one of the two criteria can be provided.
Args:
ratio: Specifies the number of leaves to be sampled as a ratio of
the total number of leaves
number_of_leaves: Explicitly specifies the number of leaves to be sampled
"""
if ratio is None and number_of_leaves is None:
raise LeafSubsamplerError(
"At least one of 'ratio' and 'number_of_leaves' "
"must be specified."
)
if ratio is not None and number_of_leaves is not None:
raise LeafSubsamplerError(
"Exactly one of 'ratio' and 'number_of_leaves'"
"must be specified."
)
self.__ratio = ratio
self.__number_of_leaves = number_of_leaves
[docs] def subsample_leaves(
self, tree: CassiopeiaTree, collapse_source: str = None
) -> CassiopeiaTree:
"""Uniformly subsample leaf samples of a given tree.
Generates a uniform random sample on the leaves of the given
CassiopeiaTree and returns a tree pruned to contain lineages relevant
to only leaves in the sample (the "induced subtree" on the sample).
All fields on the original character matrix persist, but maintains
character states, meta data, and the dissimilarity map for the sampled
cells only.
Args:
tree: The CassiopeiaTree for which to subsample leaves
collapse_source: The source node from which to collapse
unifurcations
Returns:
A new CassiopeiaTree that is the induced subtree on a sample of the
leaves in the given tree.
"""
ratio = self.__ratio
number_of_leaves = self.__number_of_leaves
n_subsample = (
number_of_leaves
if number_of_leaves is not None
else int(tree.n_cell * ratio)
)
if n_subsample <= 0:
raise LeafSubsamplerError(
"Specified number of leaves sampled is <= 0."
)
if n_subsample > tree.n_cell:
raise LeafSubsamplerError(
"Specified number of leaves sampled is greater than the number"
" of leaves in the given tree."
)
n_remove = len(tree.leaves) - n_subsample
subsampled_tree = copy.deepcopy(tree)
leaf_remove = np.random.choice(
subsampled_tree.leaves, n_remove, replace=False
)
for i in leaf_remove:
subsampled_tree.remove_leaf_and_prune_lineage(i)
if collapse_source is None:
collapse_source = subsampled_tree.root
subsampled_tree.collapse_unifurcations(source=collapse_source)
# Copy and annotate branch lengths and times
subsampled_tree.set_times(
dict(
[(node, tree.get_time(node)) for node in subsampled_tree.nodes]
)
)
return subsampled_tree