Source code for BFAIR.mfa.visualization.distributions

import numpy as np
import math
import scipy
import scipy.stats as stats
from sklearn.neighbors import KernelDensity
import matplotlib.pyplot as plt
import seaborn as sns


[docs]def sampled_fluxes_minrange(sampled_fluxes, min_val, max_val):
    """
    Sometimes the sampled fluxes include very small predicted fluxes
    for a lot of different reactions. They might be relevant but can
    easily overwhealm plots. This function reduces the subset of
    fluxes regarded for plots.

    Parameters
    ----------
    sampled_fluxes : pandas.Dataframe
        The calculated fluxes, output of sampling. For each reaction,
        n fluxes will be calculated (n = number of samples taken).
    min_val : float, int
        Min value for excluded subset.
    max_val : float, int
        Max value for excluded subset.

    Returns
    -------
    sampled_fluxes : pandas.Dataframe
        Reduced subset of the input.
    """

    def in_between(column):
        return not all(column.between(min_val, max_val, inclusive=False))

    criteria = list(
        sampled_fluxes.apply(in_between, axis=0, result_type="reduce")
    )
    return sampled_fluxes[sampled_fluxes.columns[criteria]]


[docs]def show_reactions(reactions):
    """
    A function to inspect the reactions in a given subset,
    e.g. a subsytem or pathway.

    Parameters
    ----------
    reactions : list
        List of reactions, e.g. output of `get_subsytem_reactions()`.
    """
    for cnt, rxn in enumerate(reactions):
        print(f"{cnt}: {rxn}")


def _sampled_reaction_fit(sampled_values, alpha=1e-3):
    """
    Helper function to prepare the input for the sampled reaction
    fluxes plots.

    Parameters
    ----------
    sampled_values : pandas.Series
        Sampled values for one reaction.

    Returns
    -------
    sampled_reaction : list
        List of the values sampled for a given reaction.
        Sorted ascendingly.
    fit : list
        Normal distribution fit (y-Values) over the sampled
        data.
    norm_dist : boolean
        Normal distribution, yes or no
    """
    sampled_reaction = sorted(sampled_values)
    k2, p = scipy.stats.normaltest(sampled_reaction)
    if p < alpha:
        norm_dist = False
        model = KernelDensity(bandwidth=2, kernel="gaussian")
        sample = np.array(sampled_reaction)
        sample = sample.reshape((len(sample), 1))
        model.fit(sample)
        probabilities = model.score_samples(sample)
        fit = np.exp(probabilities)
    else:
        norm_dist = True
        fit = stats.norm.pdf(
            sampled_reaction,
            np.mean(sampled_reaction),
            np.std(sampled_reaction),
        )
    return sampled_reaction, fit, norm_dist


[docs]def plot_sampled_reaction_fluxes(
    sampled_fluxes, reactions, reaction_id, bins=10, alpha=1e-3
):
    """
    Plots the distribution of the sampled values for one selected
    reaction as a histogram and a normal distribution.

    Parameters
    ----------
    sampled_fluxes : pandas.Dataframe
        The calculated fluxes, output of sampling. For each reaction,
        n fluxes will be calculated (n = number of samples taken).
    reactions : list
        List of reactions, e.g. output of `get_subsytem_reactions()`.
    reaction_id : int
        ID of the reaction in the `reactions` list.
    bins : int
        Number of bins used for the histogram. Defaults to 10.

    Returns
    -------
    fig : matplotlib.Figure
        Figure of sampled value distribution for one reaction.
    """
    sampled_values = sampled_fluxes[reactions[reaction_id]]
    if all(v == 0 for v in sampled_values):
        print(f"All sample values are 0 for {reactions[reaction_id]}")
    else:
        sampled_reaction, fit, norm_dist = _sampled_reaction_fit(
            sampled_values, alpha
        )
        fig = plt.plot(
            sampled_reaction,
            fit,
            "-o",
            color=("darksalmon" if norm_dist else "darkcyan"),
        )
        plt.hist(
            sampled_reaction,
            bins=bins,
            rwidth=0.95,
            color=("darkcyan" if norm_dist else "darksalmon"),
            density=True,
        )
        plt.ylabel("Distribution [%]")
        plt.xlabel("Flux [mmol * gDCW$^{-1}$ * h$^{-1}$]")
        plt.title(reactions[reaction_id], size=20)
        return fig


def _subplot_sampled_reaction_fluxes(
    sampled_fluxes, reactions, reaction_id, bins=10, alpha=1e-3
):
    """
    Produces a subplot of the distribution of the sampled values
    for one selected reaction as a histogram and a normal distribution.

    Parameters
    ----------
    sampled_fluxes : pandas.Dataframe
        The calculated fluxes, output of sampling. For each reaction,
        n fluxes will be calculated (n = number of samples taken).
    reactions : list
        List of reactions, e.g. output of `get_subsytem_reactions()`.
    reaction_id : int
        ID of the reaction in the `reactions` list.
    bins : int
        Number of bins used for the histogram. Defaults to 10.

    Returns
    -------
    fig : matplotlib.Figure
        Figure of sampled value distribution for one reaction.
    """
    sampled_values = sampled_fluxes[reactions[reaction_id]]
    if all(v == 0 for v in sampled_values):
        fig = plt.plot(figsize=(15, 20))
        plt.axis("off")
        plt.text(
            0,
            -0.02,
            f"All sampled\nvalues are 0\nfor {reactions[reaction_id]}",
            ha="center",
            fontsize=20,
        )
    else:
        sampled_reaction, fit, norm_dist = _sampled_reaction_fit(
            sampled_values, alpha
        )
        fig = plt.plot(
            sampled_reaction,
            fit,
            "-o",
            color=("darksalmon" if norm_dist else "darkcyan"),
        )
        plt.hist(
            sampled_reaction,
            bins=bins,
            rwidth=0.95,
            color=("darkcyan" if norm_dist else "darksalmon"),
            density=True,
        )
        plt.title(reactions[reaction_id], size=20)
        return fig


[docs]def plot_all_subsystem_fluxes(sampled_fluxes, reactions, bins=10):
    """
    Plots the distribution of the sampled values for all
    reactions in a selected subsytem as a histogram and a
    normal distribution.

    Parameters
    ----------
    sampled_fluxes : pandas.Dataframe
        The calculated fluxes, output of sampling. For each reaction,
        n fluxes will be calculated (n = number of samples taken).
    reactions : list
        List of reactions, e.g. output of `get_subsytem_reactions()`.
    bins : int
        Number of bins used for the histogram. Defaults to 10.

    Returns
    -------
    fig : matplotlib.Figure
        Figure of sampled value distribution for one reaction.
    """
    rows = math.ceil(len(reactions) / 3)
    fig, ax = plt.subplots(figsize=(15, 5 * (rows / 2)))
    for i in range(0, len(reactions)):
        plt.subplot(rows, 3, i + 1)
        _subplot_sampled_reaction_fluxes(
            sampled_fluxes, reactions, reaction_id=i, bins=bins
        )
    plt.tight_layout()
    fig.text(
        0.5,
        -0.02,
        "Flux [mmol * gDCW$^{-1}$ * h$^{-1}$]",
        ha="center",
        fontsize=20,
    )
    fig.text(
        -0.02,
        0.5,
        "Distribution [%]",
        va="center",
        rotation="vertical",
        fontsize=20,
    )
    return fig


[docs]def show_subsystems(model):
    """
    Prints the names of all the subsystems in the model with
    their IDs.

    Parameters
    ----------
    model : cobra.Model
        Metabolic model.
    """
    subsystems = []
    for rxn in model.reactions:
        subsystem = rxn.subsystem
        if subsystem not in subsystems:
            subsystems.append(subsystem)
    for cnt, sub in enumerate(subsystems):
        print(f"{cnt}: {sub}")


[docs]def get_subsytem_reactions(model, subsystem_id):
    """
    Extracts the names of the reactions in a selected subsytem. Also
    outputs a list of all the subsytems in the model.

    Parameters
    ----------
    model : cobra.Model
        Metabolic model.
    subsystem_id : int
        ID of the subsystem the reactions should be extracted from.
        Can be selected after inspection using `show_subsystems()`.

    Returns
    -------
    reactions : list
        List of reactions in a selected subsytem.
    subsystems : list
        List of subsystems in a model. Input for `plot_subsystem_fluxes()`.
    """
    subsystems = []
    subsystems_reactions = {}
    for rxn in model.reactions:
        subsystem = rxn.subsystem
        if subsystem not in subsystems:
            subsystems.append(subsystem)
        subsystems_reactions[rxn.id] = subsystem
    reactions = [
        value[0]
        for value in subsystems_reactions.items()
        if value[1] == subsystems[subsystem_id]
    ]
    return reactions, subsystems


def _reduce_sampled_fluxes(sampled_fluxes, reactions):
    """
    Reduces the input dataframe of sampled fluxes to a subset of
    selected reactions.

    Parameters
    ----------
    sampled_fluxes : pandas.Dataframe
        The calculated fluxes, output of sampling. For each reaction,
        n fluxes will be calculated (n = number of samples taken).
    reactions : list
        List of reactions, e.g. output of `get_subsytem_reactions()`.

    Returns
    -------
    re_arranged_df : pandas.Dataframe
        Sampled fluxes reduced to a specified set of reactions.
    """
    reactions_mask = [
        True if col in reactions else False for col in sampled_fluxes.columns
    ]
    subsystem_df = sampled_fluxes[sampled_fluxes.columns[reactions_mask]]
    re_arranged_df = subsystem_df.stack().reset_index(
        level=[1], name="Sampled Fluxes"
    )
    re_arranged_df = re_arranged_df.rename(
        columns={"level_1": "Reaction"}
    ).reset_index(drop=True)
    return re_arranged_df


[docs]def plot_subsystem_fluxes(
    model, sampled_fluxes, subsystem_id, no_zero_cols=False
):
    """
    Visualizes the distribution of sampled fluxes in a selected
    subsytem.

    Parameters
    ----------
    model : cobra.Model
        Metabolic model.
    sampled_fluxes : pandas.Dataframe
        The calculated fluxes, output of sampling. For each reaction,
        n fluxes will be calculated (n = number of samples taken).
    subsystem_id : int
        ID of the subsystem the reactions should be extracted from.
        Can be selected after inspection using `show_subsystems()`.
    no_zero_cols : boolean
        Indicates if columns containing only 0s should be excluded.

    Returns
    -------
    fig : matplotlib.AxesSubplot
        Figure of sampled value distribution for one reaction.
    """
    if no_zero_cols:
        sampled_fluxes = sampled_fluxes.loc[
            :, (sampled_fluxes != 0).any(axis=0)
        ]
    reactions, subsystems = get_subsytem_reactions(model, subsystem_id)
    re_arranged_df = _reduce_sampled_fluxes(sampled_fluxes, reactions)
    fig = sns.boxplot(
        x="Reaction", y="Sampled Fluxes", data=re_arranged_df, orient="v"
    )
    plt.xticks(rotation=70)
    plt.title(subsystems[subsystem_id], size=20)
    return fig