Source code for micom.workflows.media

"""Example workflows for micom."""

from os import path
import pandas as pd
from micom import load_pickle
from micom.workflows.core import workflow
from micom.workflows.results import GrowthResults, combine_results
import micom.media as mm
from micom.logger import logger
from micom.solution import OptimizationError

[docs] DIRECTION = pd.Series(["import", "export"], index=[0, 1])
[docs] def process_medium(medium, samples): """Prepare a medium for simulation.""" medium.index = medium.reaction if "sample_id" not in medium.columns: meds = [] for s in samples: m = medium.copy() m["sample_id"] = s meds.append(m) medium = pd.concat(meds, axis=0) elif not all(s in medium.sample_id.unique() for s in samples): missing = [s for s in samples if s not in medium.sample_id.unique()] raise ValueError( f"The medium is missing samples from the manifest: {', '.join(missing)}." ) return medium.drop_duplicates(subset=["reaction", "sample_id"])
[docs] def _medium(args): """Get minimal medium for a single model.""" s, p, com_growth, growth, mc, weights, solution = args com = load_pickle(p) tol = com.solver.configuration.tolerances.feasibility res = mm.minimal_medium( com, community_growth=com_growth, min_growth=growth, minimize_components=mc, open_exchanges=True, solution=solution, weights=weights, atol=tol, rtol=tol, ) if res is None: logger.info("Could not get a minimal medium for sample %s." % s) return None result = dict() if solution: medium = res["medium"].to_frame() result["growth"] = GrowthResults.from_solution(res["solution"], com) else: medium = res.to_frame() medium.columns = ["flux"] medium["sample_id"] = s medium.index.name = "reaction" result["medium"] = medium.reset_index() return result
[docs] def minimal_media( manifest: pd.DataFrame, model_folder: str, community_growth: float = 0.0, growth: float = 0.1, minimize_components: bool = False, weights: str = None, summarize: bool = True, solution: bool = False, threads: int = 1, ) -> pd.DataFrame: """Calculate the minimal medium for a set of community models. This requires specification of either the minimal community growth rate, a minimal taxon growth rate that has to be reachable by all taxa in the sample simultaneously, or a combination of both. All imports will be opened and the minimal medium allowing those growth rates will be returned. What exactly is being minimized (mass flux, carbon flux, number of components) can be specified through the `weights` and `minimize_components` options. Note ---- A common usage example would be to request some realistic growth rate for the entire community and a very low growth rate for all taxa to ensure they are growing ("alive") in the medium. The returned solution comes from the medium minimization problem and does not have to correspond to the cooperative tradeoff solution with the same medium. Arguments --------- manifest : pandas.DataFrame The manifest as returned by the `build` workflow. model_folder : str The folder in which to find the files mentioned in the manifest. medium : pandas.Series or pandas.DataFrame A growth medium with exchange reaction IDs as index and positive import fluxes as values. If a DataFrame needs columns `flux` and `reaction`. community_growth : positive float The minimum community-wide growth rate that has to be achieved on the created medium. growth : positive float, dict, or pd.Series The taxon-specific growth rates that have to be achieved. If a single float gives the growth rate for each individual taxon. If a dict or Series gives the growth rate for each taxon specified that way. Here keys are the IDs for the taxon. minimize_components : boolean Whether to minimize the number of media components rather than the total flux. This will ignore the weight argument and might be very slow. weights : str Will scale the fluxes by a weight factor. Can either be "mass" which will scale by molecular mass, a single element which will scale by the elemental content (for instance "C" to scale by carbon content). If None every metabolite will receive the same weight. Will be ignored if `minimize_components` is True. summarize: boolean Whether to summarize the medium across all samples. If False will return a medium for each sample. threads: int The number of processes to use. Returns ------- pandas.DataFrame or tuple of pandas.DataFrame and GrowthResult Either the medium or, if `solution=True` a tuple of the medium and the growth results. """ samples = manifest.sample_id.unique() args = [ ( s, path.join(model_folder, manifest[manifest.sample_id == s].file.iloc[0]), community_growth, growth, minimize_components, weights, solution, ) for s in samples ] results = workflow(_medium, args, threads) if all(r is None for r in results): raise OptimizationError( "Could not find a growth medium that allows the specified " "growth rate for any sample :(" ) elif any(r is None for r in results): logger.error( "For some samples I could not find a medium that fulfills " "the growth rate requirements. Returning media only for the " "succesful samples." ) medium = pd.concat(r["medium"] for r in results if r is not None) if summarize: medium = medium.groupby("reaction").flux.max().reset_index() medium["metabolite"] = medium.reaction.str.replace("EX_", "") if solution: results = combine_results(r["growth"] for r in results if r is not None) return medium, results return medium
[docs] def _fix_medium(args): """Get the fixed medium for a model.""" sid, p, growth, min_growth, max_import, mip, medium, weights = args com = load_pickle(p) try: fixed = mm.complete_medium( com, medium, growth=growth, min_growth=min_growth, max_import=max_import, minimize_components=mip, weights=weights, ) except Exception: logger.error("Can't reach the specified growth rates for model %s." % sid) return None fixed = pd.DataFrame({"reaction": fixed.index, "flux": fixed.values}) fixed["metabolite"] = [ list(com.reactions.get_by_id(r).metabolites.keys())[0].id for r in fixed.reaction ] fixed["description"] = [ list(com.reactions.get_by_id(r).metabolites.keys())[0].name for r in fixed.reaction ] fixed["sample_id"] = sid return fixed
[docs] def complete_community_medium( manifest: pd.DataFrame, model_folder: str, medium: pd.DataFrame, community_growth: float = 0.1, min_growth: float = 0.001, max_import: float = 1, minimize_components: float = False, summarize: bool = True, weights: str = None, threads: int = 1, ) -> pd.DataFrame: """Augment a growth medium so a community or specific taxa can grow on it. Note ---- This will complete a growth medium for a single community/sample. For building growth media that work for arbitrary samples/compositions of taxa see `complete_db_medium` In contrast to `complete_db_medium` this will account for taxon-taxon interactions. However, growth rates will no longer be an emergent property of the simulation, because one needs to specify the community growth rate or growth rates for individual taxa. Arguments --------- manifest : pandas.DataFrame The manifest as returned by the `build` workflow. model_folder : str The folder in which to find the files mentioned in the manifest. medium : pandas.Series or pandas.DataFrame A growth medium with exchange reaction IDs as index and positive import fluxes as values. If a DataFrame needs columns `flux` and `reaction`. community_growth : positive float The minimum community-wide growth rate that has to be achieved on the created medium. min_growth : positive float The minimum biomass production required for growth. max_import : positive float The maximum import rate for added imports. minimize_components : boolean Whether to minimize the number of media components rather than the total flux. summarize: boolean Whether to summarize the medium across all samples. If False will return a medium for each sample. weights : str Will scale the fluxes by a weight factor. Can either be "mass" which will scale by molecular mass, a single element which will scale by the elemental content (for instance "C" to scale by carbon content). If None every metabolite will receive the same weight. Will be ignored if `minimize_components` is True. threads: int The number of processes to use. Returns ------- pandas.DataFrame A new growth medium with the smallest amount of augmentations such that all members of the community can grow in it. """ if not isinstance(medium, pd.DataFrame): raise ValueError("`medium` must be a DataFrame.") samples = manifest.sample_id.unique() paths = { s: path.join(model_folder, manifest[manifest.sample_id == s].file.iloc[0]) for s in samples } medium = process_medium(medium, samples) if medium.flux[medium.flux < 1e-6].any(): medium.loc[medium.flux < 1e-6, "flux"] = 1e-6 logger.info("Some import rates were to small and were adjusted to 1e-6.") args = [ [ s, p, community_growth, min_growth, max_import, minimize_components, medium.flux[medium.sample_id == s], weights, ] for s, p in paths.items() ] res = workflow(_fix_medium, args, threads=threads, description="Augmenting media") if all(r is None for r in res): raise OptimizationError( "All optimizations failed. You may need to increase `max_import` " "or lower the target growth rate." ) final = pd.concat(res) if summarize: final = ( final.groupby(["reaction", "metabolite", "description"]) .flux.max() .reset_index() ) return final