Source code for micom.workflows.media
"""Example workflows for micom."""
from os import path
import pandas as pd
from micom import load_pickle
from micom.workflows.core import workflow
from micom.workflows.results import GrowthResults, combine_results
import micom.media as mm
from micom.logger import logger
from micom.solution import OptimizationError
[docs]
def process_medium(medium, samples):
"""Prepare a medium for simulation."""
medium.index = medium.reaction
if "sample_id" not in medium.columns:
meds = []
for s in samples:
m = medium.copy()
m["sample_id"] = s
meds.append(m)
medium = pd.concat(meds, axis=0)
elif not all(s in medium.sample_id.unique() for s in samples):
missing = [s for s in samples if s not in medium.sample_id.unique()]
raise ValueError(
f"The medium is missing samples from the manifest: {', '.join(missing)}."
)
return medium.drop_duplicates(subset=["reaction", "sample_id"])
[docs]
def _medium(args):
"""Get minimal medium for a single model."""
s, p, com_growth, growth, mc, weights, solution = args
com = load_pickle(p)
tol = com.solver.configuration.tolerances.feasibility
res = mm.minimal_medium(
com,
community_growth=com_growth,
min_growth=growth,
minimize_components=mc,
open_exchanges=True,
solution=solution,
weights=weights,
atol=tol,
rtol=tol,
)
if res is None:
logger.info("Could not get a minimal medium for sample %s." % s)
return None
result = dict()
if solution:
medium = res["medium"].to_frame()
result["growth"] = GrowthResults.from_solution(res["solution"], com)
else:
medium = res.to_frame()
medium.columns = ["flux"]
medium["sample_id"] = s
medium.index.name = "reaction"
result["medium"] = medium.reset_index()
return result
[docs]
def minimal_media(
manifest: pd.DataFrame,
model_folder: str,
community_growth: float = 0.0,
growth: float = 0.1,
minimize_components: bool = False,
weights: str = None,
summarize: bool = True,
solution: bool = False,
threads: int = 1,
) -> pd.DataFrame:
"""Calculate the minimal medium for a set of community models.
This requires specification of either the minimal community growth rate,
a minimal taxon growth rate that has to be reachable by all taxa in the sample
simultaneously, or a combination of both. All imports will be opened and the
minimal medium allowing those growth rates will be returned. What exactly is being
minimized (mass flux, carbon flux, number of components) can be specified through
the `weights` and `minimize_components` options.
Note
----
A common usage example would be to request some realistic growth rate for the entire
community and a very low growth rate for all taxa to ensure they are growing ("alive")
in the medium. The returned solution comes from the medium minimization problem and
does not have to correspond to the cooperative tradeoff solution with the same medium.
Arguments
---------
manifest : pandas.DataFrame
The manifest as returned by the `build` workflow.
model_folder : str
The folder in which to find the files mentioned in the manifest.
medium : pandas.Series or pandas.DataFrame
A growth medium with exchange reaction IDs as index and positive
import fluxes as values. If a DataFrame needs columns `flux` and
`reaction`.
community_growth : positive float
The minimum community-wide growth rate that has to be achieved on the created
medium.
growth : positive float, dict, or pd.Series
The taxon-specific growth rates that have to be achieved. If a single float gives
the growth rate for each individual taxon. If a dict or Series gives the growth
rate for each taxon specified that way. Here keys are the IDs for the taxon.
minimize_components : boolean
Whether to minimize the number of media components rather than the
total flux. This will ignore the weight argument and might be very slow.
weights : str
Will scale the fluxes by a weight factor. Can either be "mass" which will
scale by molecular mass, a single element which will scale by
the elemental content (for instance "C" to scale by carbon content).
If None every metabolite will receive the same weight.
Will be ignored if `minimize_components` is True.
summarize: boolean
Whether to summarize the medium across all samples. If False will
return a medium for each sample.
threads: int
The number of processes to use.
Returns
-------
pandas.DataFrame or tuple of pandas.DataFrame and GrowthResult
Either the medium or, if `solution=True` a tuple of the medium and the
growth results.
"""
samples = manifest.sample_id.unique()
args = [
(
s,
path.join(model_folder, manifest[manifest.sample_id == s].file.iloc[0]),
community_growth,
growth,
minimize_components,
weights,
solution,
)
for s in samples
]
results = workflow(_medium, args, threads)
if all(r is None for r in results):
raise OptimizationError(
"Could not find a growth medium that allows the specified "
"growth rate for any sample :("
)
elif any(r is None for r in results):
logger.error(
"For some samples I could not find a medium that fulfills "
"the growth rate requirements. Returning media only for the "
"succesful samples."
)
medium = pd.concat(r["medium"] for r in results if r is not None)
if summarize:
medium = medium.groupby("reaction").flux.max().reset_index()
medium["metabolite"] = medium.reaction.str.replace("EX_", "")
if solution:
results = combine_results(r["growth"] for r in results if r is not None)
return medium, results
return medium
[docs]
def _fix_medium(args):
"""Get the fixed medium for a model."""
sid, p, growth, min_growth, max_import, mip, medium, weights = args
com = load_pickle(p)
try:
fixed = mm.complete_medium(
com,
medium,
growth=growth,
min_growth=min_growth,
max_import=max_import,
minimize_components=mip,
weights=weights,
)
except Exception:
logger.error("Can't reach the specified growth rates for model %s." % sid)
return None
fixed = pd.DataFrame({"reaction": fixed.index, "flux": fixed.values})
fixed["metabolite"] = [
list(com.reactions.get_by_id(r).metabolites.keys())[0].id
for r in fixed.reaction
]
fixed["description"] = [
list(com.reactions.get_by_id(r).metabolites.keys())[0].name
for r in fixed.reaction
]
fixed["sample_id"] = sid
return fixed
[docs]
def complete_community_medium(
manifest: pd.DataFrame,
model_folder: str,
medium: pd.DataFrame,
community_growth: float = 0.1,
min_growth: float = 0.001,
max_import: float = 1,
minimize_components: float = False,
summarize: bool = True,
weights: str = None,
threads: int = 1,
) -> pd.DataFrame:
"""Augment a growth medium so a community or specific taxa can grow on it.
Note
----
This will complete a growth medium for a single community/sample. For building
growth media that work for arbitrary samples/compositions of taxa see
`complete_db_medium` In contrast to `complete_db_medium` this will account for
taxon-taxon interactions. However, growth rates will no longer be an emergent
property of the simulation, because one needs to specify the community growth rate
or growth rates for individual taxa.
Arguments
---------
manifest : pandas.DataFrame
The manifest as returned by the `build` workflow.
model_folder : str
The folder in which to find the files mentioned in the manifest.
medium : pandas.Series or pandas.DataFrame
A growth medium with exchange reaction IDs as index and positive
import fluxes as values. If a DataFrame needs columns `flux` and
`reaction`.
community_growth : positive float
The minimum community-wide growth rate that has to be achieved on the created
medium.
min_growth : positive float
The minimum biomass production required for growth.
max_import : positive float
The maximum import rate for added imports.
minimize_components : boolean
Whether to minimize the number of media components rather than the
total flux.
summarize: boolean
Whether to summarize the medium across all samples. If False will
return a medium for each sample.
weights : str
Will scale the fluxes by a weight factor. Can either be "mass" which will
scale by molecular mass, a single element which will scale by
the elemental content (for instance "C" to scale by carbon content).
If None every metabolite will receive the same weight.
Will be ignored if `minimize_components` is True.
threads: int
The number of processes to use.
Returns
-------
pandas.DataFrame
A new growth medium with the smallest amount of augmentations such
that all members of the community can grow in it.
"""
if not isinstance(medium, pd.DataFrame):
raise ValueError("`medium` must be a DataFrame.")
samples = manifest.sample_id.unique()
paths = {
s: path.join(model_folder, manifest[manifest.sample_id == s].file.iloc[0])
for s in samples
}
medium = process_medium(medium, samples)
if medium.flux[medium.flux < 1e-6].any():
medium.loc[medium.flux < 1e-6, "flux"] = 1e-6
logger.info("Some import rates were to small and were adjusted to 1e-6.")
args = [
[
s,
p,
community_growth,
min_growth,
max_import,
minimize_components,
medium.flux[medium.sample_id == s],
weights,
]
for s, p in paths.items()
]
res = workflow(_fix_medium, args, threads=threads, description="Augmenting media")
if all(r is None for r in res):
raise OptimizationError(
"All optimizations failed. You may need to increase `max_import` "
"or lower the target growth rate."
)
final = pd.concat(res)
if summarize:
final = (
final.groupby(["reaction", "metabolite", "description"])
.flux.max()
.reset_index()
)
return final