Source code for BFAIR.mfa.sampling.compatibility

import re
import pandas as pd
from cobra import Reaction
import copy


[docs]def model_rxn_overlap(fittedFluxes, model): """ Finds the overlapping reactions between a metabolic model that the calculated fluxes should be fit in to and the MFA output. Parameters ---------- fittedFluxes : pandas.DataFrame Dataframe (reimported output of an INCA simulation) that contains the confidence intervals predicted for the model. model : cobra.Model Metabolic model. Returns ------- overlapping reactions : pandas.Series Series of all the reactions found in both the model and the fitted fluxes. """ model_rxns = [] for rxn in model.reactions: model_rxns.append(rxn.id) INCA_rxns = fittedFluxes["rxn_id"] mask = [] for rxn in INCA_rxns: if rxn in model_rxns: mask.append(False) else: mask.append(True) return INCA_rxns[mask]
[docs]def rxn_coverage(fittedFluxes, model): """ Prints the percentage of how many of the reactions in the input fluxes are part of a given metabolic model. Parameters ---------- fittedFluxes : pandas.DataFrame Dataframe (reimported output of an INCA simulation) that contains the confidence intervals predicted for the model. model : cobra.Model Metabolic model. """ print( round( (len(model_rxn_overlap(fittedFluxes, model)) / len(fittedFluxes)), 2, ) * 100, "%", )
[docs]def split_lumped_rxns(lumped_rxns, fittedFluxes): """ For data containing lumped reactions that are speparated by and underscore. This function separated the names and adds a new reaction to the dataframe that is a copy of the data of the lumped reaction. Parameters ---------- lumped_rxns : pandas.Series or list Iterable element containing the names of the lumped reactions. fittedFluxes : pandas.DataFrame Dataframe (reimported output of an INCA simulation) that contains the confidence intervals predicted for the model. Returns ------- fittedFluxes : pandas.DataFrame Updated version of the input, now with separated lumped reactions. """ fittedFluxes = copy.deepcopy(fittedFluxes) for rxn in lumped_rxns: split_names = rxn.split("_") for index, split_name in enumerate(split_names): if index == 0: df_row_index = list(fittedFluxes["rxn_id"]).index(rxn) row = fittedFluxes.iloc[df_row_index] fittedFluxes.at[df_row_index, "rxn_id"] = split_name df_row_index = list(fittedFluxes["rxn_id"]).index(split_name) else: row = fittedFluxes.iloc[df_row_index] fittedFluxes = fittedFluxes.append(row, ignore_index=True) fittedFluxes.at[len(fittedFluxes) - 1, "rxn_id"] = split_name return fittedFluxes
[docs]def split_lumped_reverse_rxns(lumped_reverse_rxns, fittedFluxes): """ For data containing lumped reverse reactions that are speparated by and underscore. This function separated the names and adds a new reaction to the dataframe that is a copy of the data of the lumped reaction. Parameters ---------- lumped_reverse_rxns : pandas.Series or list Iterable element containing the names of the lumped reverse reactions. fittedFluxes : pandas.DataFrame Dataframe (reimported output of an INCA simulation) that contains the confidence intervals predicted for the model. Returns ------- fittedFluxes : pandas.DataFrame Updated version of the input, now with separated lumped reactions. """ fittedFluxes = copy.deepcopy(fittedFluxes) for rxn in lumped_reverse_rxns: print(rxn) name = re.match(".+?(?=_reverse)", rxn)[0] split_names = name.split("_") for index, split_name in enumerate(split_names): if index == 0: df_row_index = list(fittedFluxes["rxn_id"]).index(rxn) row = fittedFluxes.iloc[df_row_index] fittedFluxes.at[df_row_index, "rxn_id"] = ( split_name + "_reverse" ) df_row_index = list(fittedFluxes["rxn_id"]).index( split_name + "_reverse" ) else: row = fittedFluxes.iloc[df_row_index] fittedFluxes = fittedFluxes.append(row, ignore_index=True) fittedFluxes.at[len(fittedFluxes) - 1, "rxn_id"] = ( split_name + "_reverse" ) return fittedFluxes
[docs]def find_reverse_rxns(fittedFluxes): """ Provides an overview over reverse- and their corresponding forward reactions. Parameters ---------- fittedFluxes : pandas.DataFrame Dataframe (reimported output of an INCA simulation) that contains the confidence intervals predicted for the model. Returns ------- reverse_rxns : pandas.DataFrame Names of reactions and their corresponding reverse reactions. """ reverse_rxns = {} for cnt, rxn in enumerate(fittedFluxes["rxn_id"]): if "_reverse" in rxn: reverse_rxns[cnt] = { "forward": re.match(".+?(?=_reverse)", rxn)[0], "reverse": rxn, } reverse_rxns = pd.DataFrame.from_dict(reverse_rxns, "index") return reverse_rxns
def _overlaps(a, b): """ Return the amount of overlap, between a and b. If >0, how much they overlap If 0, they are book-ended If <0, distance Parameters ---------- a, b : list lists of two numerals denoting the boarders of ranges. Returns ------- overlap : float overlap as described above. """ return min(a[1], b[1]) - max(a[0], b[0])
[docs]def combine_split_rxns(fittedFluxes): """ Checks the flux bounds of forward and reverse rections. If they overlap, then they are are combined into one reaction with the bounds covering both forward and reverse. If the don't, then their names are noted in a list so they can be processed in a subsequent step. Parameters ---------- fittedFluxes : pandas.DataFrame Dataframe (reimported output of an INCA simulation) that contains the confidence intervals predicted for the model. Returns ------- fittedFluxes : pandas.DataFrame Updated version of the input, but overlapping forward and reverse reactions are joined. rxns_to_split : list Names of the reactions for which a corresponding reverse reaction should be added to the model. """ fittedFluxes = copy.deepcopy(fittedFluxes) rxns_to_split = [] reverse_rxns = find_reverse_rxns(fittedFluxes) for _, row in reverse_rxns.iterrows(): fittedFluxes_forward = fittedFluxes[fittedFluxes["rxn_id"] == row[0]] forward_lb = fittedFluxes_forward["flux_lb"].values[0] forward_ub = fittedFluxes_forward["flux_ub"].values[0] forward = [forward_lb, forward_ub] fittedFluxes_reverse = fittedFluxes[fittedFluxes["rxn_id"] == row[1]] reverse_lb = fittedFluxes_reverse["flux_lb"].values[0] reverse_ub = fittedFluxes_reverse["flux_ub"].values[0] reverse = [-reverse_ub, -reverse_lb] if _overlaps(forward, reverse) == 0: fittedFluxes.at[ fittedFluxes_forward.index[0], "flux_lb" ] = -fittedFluxes.at[fittedFluxes_reverse.index[0], "flux_ub"] fittedFluxes = fittedFluxes.drop(fittedFluxes_reverse.index[0]) else: print("These reactions need to be split into two:", row[0]) rxns_to_split.append(row[0]) fittedFluxes = fittedFluxes.reset_index() return fittedFluxes, rxns_to_split
[docs]def cobra_add_split_rxns(rxns_to_split, model): """ Adds inverse copies of the reactions that need a defined reverse reaction to the input model. Parameters ---------- rxns_to_split : list Names of the reactions for which a corresponding reverse reaction should be added to the model. model : cobra.Model Metabolic model. """ for i, rxn in enumerate(rxns_to_split): try: rxn_name = f"{rxn}_reverse" reaction = Reaction(rxn_name) reaction.name = model.reactions.get_by_id(rxn).name reaction.subsystem = model.reactions.get_by_id(rxn).subsystem reaction.lower_bound = 0.0 # This is the default reaction.upper_bound = 1000.0 # This is the default mets = [ met.id for met in model.reactions.get_by_id(rxn).metabolites ] neg_coeff = [ model.reactions.get_by_id(rxn).get_coefficient(met.id) * -1 for met in model.reactions.get_by_id(rxn).metabolites ] model.add_reactions([reaction]) model.reactions.get_by_id(rxn_name).add_metabolites( dict(zip(mets, neg_coeff)) ) genes = [g.id for g in model.reactions.get_by_id(rxn).genes] reaction_rule = "( " for gene_i, gene in enumerate(genes): if gene_i == 0: reaction_rule += gene else: reaction_rule += " or " + gene reaction_rule += " )" model.reactions.get_by_id( rxn_name ).gene_reaction_rule = reaction_rule print(f"- Added {rxn} to model") except KeyError: print(f"# Could not add {rxn} to model")