"""
This file stores a subclass of TreeSolver, the SimpleFitSubcloneSimulator. The
SimpleFitSubcloneSimulator simulates a clonal population which develops one
fit subclone.
"""
import networkx as nx
from queue import Queue
from typing import Callable, Generator, Tuple, Union
from cassiopeia.data import CassiopeiaTree
from cassiopeia.simulator.TreeSimulator import TreeSimulator
[docs]
class SimpleFitSubcloneSimulator(TreeSimulator):
r"""
Simulates a clonal population which develops one fit subclone.
This TreeSimulator simulates a (binary) clonal population that evolves
neutrally with constant or generated branch length 'branch_length_neutral',
until in generation 'generations_until_fit_subclone' one of the lineages
develops fitness and starts to expand with the new constant or generated
branch length 'branch_length_fit'. The total length of the experiment is
given by 'experiment_duration'. This TreeSimulator is useful because (1) it
generates both deterministic and stochastic phylogenies and (2) the
phylogenies are as simple as possible while being non-neutral. The ability
to control the branch lengths of the neutrally evolving population and of
the fit subpopulation allows us to simulate different degrees of fitness
as well as stochasticity.
Note that the branches for the leaves of the tree need not have length
exactly equal to 'branch_length_neutral' nor 'branch_length_fit', because
the population gets assayed at the arbitrary time specified by
'experiment_duration', which is not necessarily the time at which a
division event happens.
The names of the leaf nodes will be given by a unique integer ID, followed
by "_neutral" for the neutrally evolving individuals, and by "_fit" for the
fit individuals.
Finally, note that it is possible to make the subclone less fit, i.e.
expand slower, for there is no constraint in the branch lengths, although
the most typical use case is the simulation of a more fit subpopulation.
Args:
branch_length_neutral: Branch length of the neutrally evolving
individuals. All individuals are neutrally evolving until generation
'generations_until_fit_subclone', when exactly one of the lineages
develops fitness. A callable can be provided instead of a constant,
which is useful for simulating random branch lengths.
branch_length_fit: The branch length of the fit subclone, which appears
exactly at generation 'generations_until_fit_subclone'. A callable
can be provided instead of a constant, which is useful for
simulating random branch lengths.
experiment_duration: The total length of the experiment.
generations_until_fit_subclone: The generation at which one lineage
develops fitness, giving rise to a fit subclone.
"""
def __init__(
self,
branch_length_neutral: Union[float, Callable[[], float]],
branch_length_fit: Union[float, Callable[[], float]],
experiment_duration: float,
generations_until_fit_subclone: int,
):
self.branch_length_neutral = self._create_callable(
branch_length_neutral
)
self.branch_length_fit = self._create_callable(branch_length_fit)
self.experiment_duration = experiment_duration
self.generations_until_fit_subclone = generations_until_fit_subclone
def _create_callable(
self, x: Union[float, Callable[[], float]]
) -> Callable[[], float]:
# In case the user provides an int, we still hold their back...
if type(x) in [int, float]:
def constant_branch_length_callable() -> float:
return x
return constant_branch_length_callable
else:
return x
[docs]
def simulate_tree(self) -> CassiopeiaTree:
r"""
See base class.
"""
branch_length_neutral = self.branch_length_neutral
branch_length_fit = self.branch_length_fit
experiment_duration = self.experiment_duration
generations_until_fit_subclone = self.generations_until_fit_subclone
def node_name_generator() -> Generator[str, None, None]:
i = 0
while True:
yield str(i)
i += 1
tree = nx.DiGraph() # This is what will get populated.
names = node_name_generator()
# Contains: (node, time, fitness, generation)
q = Queue() # type: Queue[Tuple[str, float, str, int]]
times = {}
root = next(names) + "_neutral"
tree.add_node(root)
times[root] = 0.0
root_child = next(names) + "_neutral"
tree.add_edge(root, root_child)
q.put((root_child, 0.0, "neutral", 0))
subclone_started = False
while not q.empty():
# Pop next node
(node, time, node_fitness, generation) = q.get()
time_till_division = (
branch_length_neutral()
if node_fitness == "neutral"
else branch_length_fit()
)
time_of_division = time + time_till_division
if time_of_division >= experiment_duration:
# Not enough time left for the individual to divide.
times[node] = experiment_duration
continue
# Create children, add edges to them, and push children to the
# queue.
times[node] = time_of_division
left_child_fitness = node_fitness
right_child_fitness = node_fitness
if (
not subclone_started
and generation + 1 == generations_until_fit_subclone
):
# Start the subclone
subclone_started = True
left_child_fitness = "fit"
left_child = next(names) + "_" + left_child_fitness
right_child = next(names) + "_" + right_child_fitness
tree.add_nodes_from([left_child, right_child])
tree.add_edges_from([(node, left_child), (node, right_child)])
q.put(
(
left_child,
time_of_division,
left_child_fitness,
generation + 1,
)
)
q.put(
(
right_child,
time_of_division,
right_child_fitness,
generation + 1,
)
)
res = CassiopeiaTree(tree=tree)
res.set_times(times)
return res