Source code for micom.workflows.grow

"""Performs growth and exchange analysis for several models."""

from cobra.util.solver import interface_to_str, OptimizationError
from micom import load_pickle
from micom.annotation import annotate_metabolites_from_exchanges
from micom.logger import logger
from micom.media import minimal_medium
from micom.workflows.core import workflow
from micom.workflows.results import GrowthResults
from micom.workflows.media import process_medium
from os import path
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)


[docs]
DIRECTION = pd.Series(["import", "export"], index=[0, 1])


[docs]
ARGS = {
    "none": {"fluxes": True, "pfba": False},
    "minimal imports": {"fluxes": False, "pfba": False},
    "pFBA": {"fluxes": True, "pfba": True},
}




[docs]
def _growth(args):
    p, tradeoff, medium, weights, strategy, atol, rtol, presolve = args
    com = load_pickle(p)

    if atol is None:
        atol = com.solver.configuration.tolerances.feasibility
    if rtol is None:
        rtol = com.solver.configuration.tolerances.feasibility
    if presolve:
        # looks stupid but that here is to respect the preset
        # and there is an auto setting that we want to respect
        com.solver.configuration.presolve = presolve

    if "glpk" in interface_to_str(com.solver.interface):
        logger.error(
            "Community models were not built with a QP-capable solver. "
            "This means that you did not install CPLEX or Gurobi. "
            "If you did install one of the two please file a bug report "
            "at https://github.com/micom-dev/micom/issues."
        )
        return None

    ex_ids = [r.id for r in com.exchanges]
    logger.info(
        "%d/%d import reactions found in model.",
        medium.index.isin(ex_ids).sum(),
        len(medium),
    )
    com.medium = medium[medium.index.isin(ex_ids)]

    # Get growth rates
    args = ARGS[strategy].copy()
    args["atol"] = atol
    args["rtol"] = rtol
    args["fraction"] = tradeoff
    try:
        sol = com.cooperative_tradeoff(**args)
        rates = sol.members
        rates["taxon"] = rates.index
        rates["tradeoff"] = tradeoff
        rates["sample_id"] = com.id
    except Exception:
        logger.error(
            "Could not solve cooperative tradeoff for %s. "
            "This can often be fixed by enabling `presolve`, choosing more "
            "permissive atol and rtol arguments, or by checking that medium "
            "fluxes are > atol." % com.id
        )
        return None

    if strategy == "minimal imports":
        # Get the minimal medium and the solution at the same time
        med = minimal_medium(
            com,
            exchanges=None,
            community_growth=sol.growth_rate,
            min_growth=rates.growth_rate.drop("medium"),
            solution=True,
            weights=weights,
            atol=atol,
            rtol=rtol,
        )
        if med is None:
            logger.error(
                "The minimal medium optimization failed for %s. "
                "This can often be fixed by enabling `presolve`, choosing more "
                "permissive atol and rtol arguments, or by checking that medium "
                "fluxes are > atol." % com.id
            )
            return None
        sol = med["solution"]

    exs = list({r.global_id for r in com.internal_exchanges + com.exchanges})
    fluxes = sol.fluxes.loc[:, exs].copy()
    fluxes["sample_id"] = com.id
    fluxes["tolerance"] = atol
    anns = annotate_metabolites_from_exchanges(com)
    return {"growth": rates, "exchanges": fluxes, "annotations": anns}




[docs]
def grow(
    manifest,
    model_folder,
    medium,
    tradeoff,
    threads=1,
    weights=None,
    strategy="minimal imports",
    atol=None,
    rtol=None,
    presolve=False,
):
    """Simulate growth for a set of community models.

    Note
    ----
    The strategy `mimimal imports` can become unstable for common carbon sources since
    it will add in infeasible imports that are very small but import some high-C
    molecules. If you use it check that only components from your medium have been used
    and molecules that should be essential are indeed consumed.

    Parameters
    ----------
    manifest : pandas.DataFrame
        The manifest as returned by the `build` workflow.
    model_folder : str
        The folder in which to find the files mentioned in the manifest.
    medium : pandas.DataFrame
        A growth medium. Must have columns "reaction" and "flux" denoting
        exchange reactions and their respective maximum flux.
    tradeoff : float in (0.0, 1.0]
        A tradeoff value. Can be chosen by running the `tradeoff` workflow or
        by experince. Tradeoff values of 0.5 for metagenomcis data and 0.3 for
        16S data seem to work well.
    threads : int >=1
        The number of parallel workers to use when building models. As a
        rule of thumb you will need around 1GB of RAM for each thread.
    strategy : str
        Computational strategy used to reduce the flux space. Default "minimal imports"
        uses the solution with the smallest total import flux from the environment,
        "pFBA" uses parsimonious FBA, and "none" returns an arbitrary
        feasible flux distribution.
    weights : str
        Only used during the calculaton of the minimal import rates.
        Will scale the fluxes by a weight factor. Can either be "mass" which will
        scale by molecular mass, a single element which will scale by
        the elemental content (for instance "C" to scale by carbon content).
        If None every metabolite will receive the same weight.
        Will be ignored if `minimize_components` is True.
    atol : float
        Absolute tolerance for the growth rates. If None will use the solver tolerance.
    rtol : float
        Relative tolerqance for the growth rates. If None will use the solver tolerance.
    presolve : bool
        Whether to use the presolver/scaling. Can improve numerical accuracy in some
        cases.

    Returns
    -------
    GrowthResults
        A named tuple containing the growth rates and exchange fluxes for all
        samples/models.
    """
    if strategy not in ARGS:
        raise ValueError(
            "`%s` is not a valid strategy. Must be one of %s!"
            % (strategy, ", ".join(ARGS))
        )
    samples = manifest.sample_id.unique()
    paths = {
        s: path.join(model_folder, manifest[manifest.sample_id == s].file.iloc[0])
        for s in samples
    }
    medium = process_medium(medium, samples)
    args = [
        [
            p,
            tradeoff,
            medium.flux[medium.sample_id == s],
            weights,
            strategy,
            atol,
            rtol,
            presolve,
        ]
        for s, p in paths.items()
    ]
    results = workflow(_growth, args, threads)
    if all([r is None for r in results]):
        raise OptimizationError(
            "All numerical optimizations failed. This indicates a problem "
            "with the solver or numerical instabilities. Check that you have "
            "CPLEX or Gurobi installed. You may also increase the abundance "
            "cutoff to create simpler models."
        )
    growth = pd.concat(r["growth"] for r in results if r is not None)
    growth = growth[growth.taxon != "medium"]
    exchanges = pd.concat(r["exchanges"] for r in results if r is not None)
    exchanges["taxon"] = exchanges.index.values
    exchanges = exchanges.melt(
        id_vars=["taxon", "sample_id", "tolerance"],
        var_name="reaction",
        value_name="flux",
    ).dropna(subset=["flux"])
    abundance = growth[["taxon", "sample_id", "abundance"]]
    exchanges = pd.merge(exchanges, abundance, on=["taxon", "sample_id"], how="outer")
    anns = pd.concat(
        r["annotations"] for r in results if r is not None
    ).drop_duplicates(subset=["reaction"])
    anns.index = anns.reaction
    exchanges = pd.merge(exchanges, anns[["metabolite"]], on="reaction", how="left")
    exchanges["direction"] = DIRECTION[(exchanges.flux > 0.0).astype(int)].values
    exchanges = exchanges[exchanges.flux.abs() > exchanges.tolerance]

    return GrowthResults(growth, exchanges, anns)