# -*- coding: utf-8 -*-
"""Main module."""
# Libraries
import pandas as pd
import numpy as np
import time
import ast
import sys
try:
import matlab.engine
except ModuleNotFoundError:
print(
"Please check the README for a guide on how to "
"install the MATLAB engine"
)
__version__ = "0.0.1"
class INCA_script:
def __init__(self):
"""
Class of functions to set up a MATLAB script for MFA using
INCA and for interaction with MATLAB itself. Will later on
also re-import data from .mat file.
"""
[docs] def limit_to_one_model(self, data_input, model_name_column, model_name):
"""
Limits the data to values that are assigned to one metabolic model
Parameters
----------
data_input : pandas.DataFrame
Input data file that needs to be processed
model_name_column : string
Column name where model names are defined
model_name : string
Name of the model the data will be limited to
Returns
-------
data_input : pandas.DataFrame
Limited data
"""
data_output = pd.DataFrame()
for i, row in data_input.iterrows():
if row[model_name_column] == model_name:
if len(data_output) == 0:
data_output = pd.DataFrame.transpose(row.to_frame())
else:
data_output = data_output.append(
pd.DataFrame.transpose(row.to_frame())
)
data_input = data_output
return data_input
[docs] def limit_to_one_experiment(
self, data_input, experiment_name_column, experiment_name
):
"""
Limits the data to values that were acquired in one experiment
Parameters
----------
data_input : pandas.DataFrame
Input data file that needs to be processed
experiment_name_column : string
Column name where experiment names
are defined
model_name : string
Name of the model the data will be limited to
Returns
-------
data_input : pandas.DataFrame
Limited data
"""
data_output = pd.DataFrame()
for i, row in data_input.iterrows():
if row[experiment_name_column] == experiment_name:
if len(data_output) == 0:
data_output = pd.DataFrame.transpose(row.to_frame())
else:
data_output = data_output.append(
pd.DataFrame.transpose(row.to_frame())
)
data_input = data_output
return data_input
def prepare_input(
self, string, type_of_replacement=["Curly", "Double_square"]
):
"""
Process data that is stored in strings of lists etc in the files
Parameters
----------
string : string, in this set up a single cell in a pandas.DataFrame
Processes the data in cells in the dataframes.
The info is either bordered by curly or double square
brackets.
type_of_replacement : string
Define the type of surrounding brackets
Returns
-------
string : list
returns data in lists without bordering brackets
"""
if type_of_replacement == "Curly":
string = string.strip("}{").split(",")
elif type_of_replacement == "Double_square":
string = string[1:-1]
string = ast.literal_eval(string)
return string
[docs] def initiate_MATLAB_script(self):
"""
Starts writing the MATLAB script
Returns
-------
mat_script : string
Initialized MATLAB script
"""
mat_script = "clear functions\n\n"
return mat_script
def reaction_mapping(
self,
atomMapping_molecules_ids,
model_molecules_ids,
atomMapping_molecules_stoichiometry,
atomMapping_molecules_elements,
atomMapping_molecules_mapping,
model_molecules_stoichiometry,
reaction_type="reactant",
):
"""
Provides the carbon mapping for metabolites in a model.
Called within "add_reactions_to_script()"
Parameters
----------
atomMapping_molecules_ids : list
processed atomMappingReactions_data_I
reactants_ids_tracked or products_ids_tracked
model_molecules_ids : list
processed modelReaction_data_I
reactants_ids or products_ids
atomMapping_molecules_stoichiometry : list
processed
atomMappingReactions_data_I
reactants_stoichiometry_tracked or
productss_stoichiometry_tracked
atomMapping_molecules_elements : list
processed
atomMappingReactions_data_I
reactants_elements_tracked or products_elements_tracked
atomMapping_molecules_mapping : list
processed
atomMappingReactions_data_I
reactants_mapping or products_mapping
model_molecules_stoichiometry : list
processed modelReaction_data_I
reactants_stoichiometry or products_stoichiometry
reaction_type : string
reactant or product
Returns
-------
rxn_equation : string
Reaction equation for the defined reaction
"""
rxn_equation = ""
if reaction_type == "product":
rxn_equation += " -> "
# This had to be added because some metabolites appear more than once
# in one reaction with different mappings
seen_molecule = {}
dupes_molecule = []
if not len(atomMapping_molecules_ids) == len(
set(atomMapping_molecules_ids)
):
for x in atomMapping_molecules_ids:
if x not in seen_molecule:
seen_molecule[x] = 1
else:
if seen_molecule[x] == 1:
dupes_molecule.append(x)
seen_molecule[x] += 1
# Iterate through the metabolites involved in the reactions
for molecule_cnt, molecule in enumerate(model_molecules_ids):
# Indices for metabolites with multiple different mappings
# are being sought
# initiate boolean for "while" loop
duplicates_not_done = True
duplicates_counter = 0
while duplicates_not_done:
# this little bit adds +s if it's not the first molecule
if molecule_cnt > 0:
rxn_equation += " + "
if molecule in atomMapping_molecules_ids:
if molecule in dupes_molecule:
positions = [
i
for i, x in enumerate(atomMapping_molecules_ids)
if x == molecule
]
molecule_index = positions[duplicates_counter]
if duplicates_counter == len(positions) - 1:
duplicates_not_done = False
duplicates_counter += 1
else:
molecule_index = atomMapping_molecules_ids.index(
molecule
)
duplicates_not_done = False
# here we check if it is a tuple, i.e. there is more than
# one metabolite, or a list, i.e. only one metabolite
rxn_equation += (
str(
atomMapping_molecules_stoichiometry[molecule_index]
)
+ "*"
+ molecule
+ " ("
)
if type(atomMapping_molecules_elements) is tuple:
# here we go through the mapping, enumerate the atoms
# and add the corresponding mapping
for mapping_cnt, mapping in enumerate(
atomMapping_molecules_elements[molecule_index]
):
# to remove the final whitespace
if (mapping_cnt + 1) == len(
atomMapping_molecules_mapping[molecule_index]
):
rxn_equation += (
mapping
+ str(mapping_cnt + 1)
+ ":"
+ atomMapping_molecules_mapping[
molecule_index
][mapping_cnt]
)
else:
rxn_equation += (
mapping
+ str(mapping_cnt + 1)
+ ":"
+ atomMapping_molecules_mapping[
molecule_index
][mapping_cnt]
+ " "
)
elif type(atomMapping_molecules_elements) is list:
for mapping_cnt, mapping in enumerate(
atomMapping_molecules_elements
):
# to remove the final whitespace
if (mapping_cnt + 1) == len(
atomMapping_molecules_mapping[0]
):
rxn_equation += (
mapping
+ str(mapping_cnt + 1)
+ ":"
+ atomMapping_molecules_mapping[0][
mapping_cnt
]
)
else:
rxn_equation += (
mapping
+ str(mapping_cnt + 1)
+ ":"
+ atomMapping_molecules_mapping[0][
mapping_cnt
]
+ " "
)
# close the brackets for mapping
rxn_equation += ")"
# Exceptions that are in the model but not
# tracked are added here
else:
rxn_equation += (
str(model_molecules_stoichiometry[molecule_cnt])
+ "*"
+ molecule
)
duplicates_not_done = False
return rxn_equation
[docs] def add_reactions_to_script(
self, modelReaction_data_I, atomMappingReactions_data_I
):
"""
Translates the model and adds mapping using reaction_mapping()
Parameters
----------
modelReaction_data_I : pandas.DataFrame
pre-processed modelReaction_data_I input data
atomMappingReactions_data_I : pandas.DataFrame
pre-processed
atomMappingReactions_data_I input data
Returns
-------
mat_script : string
Extention to the MATLAB script under construction
model_rxn_ids_exp : list
List of reaction IDs used for the model
"""
if len(atomMappingReactions_data_I["rxn_id"]) != len(
atomMappingReactions_data_I["rxn_id"].unique()
):
sys.exit(
"There is a duplicate reaction in atomMappingReactions_data_I"
)
mat_script = "r = reaction({... % define reactions\n"
model_rxn_ids = []
model_rxn_ids_exp = []
# First we go through the reactions in the model to make sure that
# we only include the ones that are actually there
for cnt, modelReaction_data in modelReaction_data_I.iterrows():
if modelReaction_data["used_"]:
model_rxn_id = modelReaction_data["rxn_id"]
model_rxn_ids.append(model_rxn_id)
model_rxn_ids = sorted(model_rxn_ids, key=str.lower)
for id_cnt, model_rxn_id in enumerate(model_rxn_ids):
for cnt, modelReaction_data in modelReaction_data_I.iterrows():
# Model
index_in_model = list(modelReaction_data_I["rxn_id"]).index(
modelReaction_data["rxn_id"]
)
model_reactants_stoichiometry = self.prepare_input(
modelReaction_data_I.iloc[index_in_model].loc[
"reactants_stoichiometry"
],
"Curly",
)
model_products_stoichiometry = self.prepare_input(
modelReaction_data_I.iloc[index_in_model].loc[
"products_stoichiometry"
],
"Curly",
)
if modelReaction_data["rxn_id"] == model_rxn_id:
rxn_equation = "'"
# we save the location of the row corresponding to
# this reaction in the other file
# "prepare_input" is a helper function to make the
# entries useable, this will have to be checked with
# other input
# Atom Mapping
index_in_atomMapping = list(
atomMappingReactions_data_I["rxn_id"]
).index(modelReaction_data["rxn_id"])
atomMapping_reactants_stoichiometry = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["reactants_stoichiometry_tracked"],
"Curly",
)
atomMapping_products_stoichiometry = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["products_stoichiometry_tracked"],
"Curly",
)
# this step makes sure that we exclude reactions that
# do not have any stoichiometry annotated
if (
atomMapping_reactants_stoichiometry[0] == ""
and atomMapping_products_stoichiometry[0] == ""
):
print(
"There is no stoichimetriy given for:",
model_rxn_id,
)
# Model
model_reactants_ids = self.prepare_input(
modelReaction_data_I.iloc[index_in_model].loc[
"reactants_ids"
],
"Curly",
)
model_products_ids = self.prepare_input(
modelReaction_data_I.iloc[index_in_model].loc[
"products_ids"
],
"Curly",
)
model_reactants_stoichiometry = [
float(i) * -1
for i in model_reactants_stoichiometry
]
model_products_stoichiometry = [
float(i) for i in model_products_stoichiometry
]
for reactant_cnt, reactant in enumerate(
model_reactants_ids
):
# this little bit adds +s if it's not
# the first reactant
if reactant_cnt > 0:
rxn_equation += " + "
rxn_equation += (
str(
model_reactants_stoichiometry[reactant_cnt]
)
+ "*"
+ reactant
)
rxn_equation += " -> "
for product_cnt, product in enumerate(
model_products_ids
):
# this little bit adds +s if it's not
# the first product
if product_cnt > 0:
rxn_equation += " + "
rxn_equation += (
str(model_products_stoichiometry[product_cnt])
+ "*"
+ product
)
else:
# process the remaining info about the metabolites.
# Done here because it can throw an error if
# anything is empty
# Atom Mapping
atomMapping_reactants_ids = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["reactants_ids_tracked"],
"Curly",
)
atomMapping_products_ids = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["products_ids_tracked"],
"Curly",
)
atomMapping_reactants_elements = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["reactants_elements_tracked"],
"Double_square",
)
atomMapping_products_elements = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["products_elements_tracked"],
"Double_square",
)
atomMapping_reactants_mapping = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["reactants_mapping"],
"Curly",
)
atomMapping_products_mapping = self.prepare_input(
atomMappingReactions_data_I.iloc[
index_in_atomMapping
].loc["products_mapping"],
"Curly",
)
atomMapping_reactants_stoichiometry = [
float(i) * -1
for i in atomMapping_reactants_stoichiometry
]
atomMapping_products_stoichiometry = [
float(i)
for i in atomMapping_products_stoichiometry
]
# Model
model_reactants_ids = self.prepare_input(
modelReaction_data_I.iloc[index_in_model].loc[
"reactants_ids"
],
"Curly",
)
model_products_ids = self.prepare_input(
modelReaction_data_I.iloc[index_in_model].loc[
"products_ids"
],
"Curly",
)
model_reactants_stoichiometry = [
float(i) * -1
for i in model_reactants_stoichiometry
]
model_products_stoichiometry = [
float(i) for i in model_products_stoichiometry
]
rxn_equation = "'"
# We go through the IDs to treat each
# metabolite separately
reactant_equation = self.reaction_mapping(
atomMapping_reactants_ids,
model_reactants_ids,
atomMapping_reactants_stoichiometry,
atomMapping_reactants_elements,
atomMapping_reactants_mapping,
model_reactants_stoichiometry,
reaction_type="reactant",
)
product_equation = self.reaction_mapping(
atomMapping_products_ids,
model_products_ids,
atomMapping_products_stoichiometry,
atomMapping_products_elements,
atomMapping_products_mapping,
model_products_stoichiometry,
reaction_type="product",
)
rxn_equation = (
rxn_equation + reactant_equation + product_equation
)
# the ids of the equations are being added to the list that
# will be exported
model_rxn_ids_exp.append(model_rxn_id)
mat_script += rxn_equation + " ';...\n"
mat_script += "});\n\n"
return mat_script, model_rxn_ids_exp
[docs] def initialize_model(self):
"""
Previously described reactions are assigned to a model object
Returns
-------
mat_script : string
addition to MATLAB script under construction
"""
mat_script = "m = model(r); % set up model\n\n"
return mat_script
# NOTE: hard-coded for now until a better workaround can be done
[docs] def unbalanced_reactions(
self,
atomMappingMetabolite_data_I,
unbalanced_metabolites=["co2_e", "h2o_e", "h_e", "na1_e"],
):
"""
Adds in the metabolite state (balanced or unbalanced)
Parameters
----------
atomMappingMetabolite_data_I : pandas.DataFrame
pre-processed
atomMappingMetabolite_data_I input data
unbalanced_metabolites : list of string elements
list of unbalanced
metabolites - hardcoded
Returns
-------
mat_script : string
addition to MATLAB script under construction
"""
tmp_script = "% define unbalanced reactions\n"
# specify reactions that should be forcible unbalanced
metabolites_all = [
x["met_id"] for cnt, x in atomMappingMetabolite_data_I.iterrows()
]
# unbalanced reactions:
for met in unbalanced_metabolites:
if met in metabolites_all:
tmp_script = (
tmp_script
+ "m.states{'"
+ met
+ ".EX"
+ "'}.bal = false;\n"
)
mat_script = tmp_script
return mat_script
# Stedv stuff should be checked once more before merging to main
[docs] def add_reaction_parameters(
self,
modelReaction_data_I,
measuredFluxes_data_I,
model_rxn_ids,
fluxes_present=True,
):
"""
Flux parameters are added. They correspond to the previously
described reactions
Parameters
----------
modelReaction_data_I : pandas.DataFrame
pre-processed
modelReaction_data_I input data
measuredFluxes_data_I : pandas.DataFrame
pre-processed
measuredFluxes_data_I input data
model_rxn_ids : list
pre-processed
model_rxn_ids input data
fluxes_present : pandas.DataFrame
confirm if fluxes are present. If not then the
flux measuredFluxes_data_I file will be ignored
Returns
-------
mat_script : string
addition to MATLAB script under construction
"""
# Add in initial fluxes (lb/ub, values, on/off) and define the
# reaction ids
# NOTE: lb, ub, val = 0 for steady-state
# lower bounds
tmp_script_lb = "\n% define lower bounds\nm.rates.flx.lb = [...\n"
# upper bounds
tmp_script_ub = "\n%define upper bounds\nm.rates.flx.ub = [...\n"
# intial flux values
tmp_script_val = "\n% define flux vals\nm.rates.flx.val = [...\n"
# include/exclude a reaction from the simulation
tmp_script_on = "\n% include/exclude reactions\nm.rates.on = [...\n"
# rxn_ids
tmp_script_id = "\n% define reaction ids\nm.rates.id = {...\n"
for model_rxn_id in model_rxn_ids:
for rxn_cnt, rxn in modelReaction_data_I.iterrows():
if rxn["rxn_id"] == model_rxn_id and rxn["used_"]:
if fluxes_present:
if rxn["rxn_id"] in list(
measuredFluxes_data_I["rxn_id"]
):
for (
flux_cnt,
flux,
) in measuredFluxes_data_I.iterrows():
if rxn["rxn_id"] == flux["rxn_id"]:
tmp_script_lb = (
tmp_script_lb
+ str(flux["flux_lb"])
+ ",...\n"
)
tmp_script_ub = (
tmp_script_ub
+ str(flux["flux_ub"])
+ ",...\n"
)
tmp_script_val = (
tmp_script_val
+ str(flux["flux_average"])
+ ",...\n"
)
else:
tmp_script_lb = (
tmp_script_lb
+ str(rxn["lower_bound"])
+ ",...\n"
)
tmp_script_ub = (
tmp_script_ub
+ str(rxn["upper_bound"])
+ ",...\n"
)
tmp_script_val = tmp_script_val + str(0) + ",...\n"
else:
tmp_script_lb = (
tmp_script_lb + str(rxn["lower_bound"]) + ",...\n"
)
tmp_script_ub = (
tmp_script_ub + str(rxn["upper_bound"]) + ",...\n"
)
if rxn["upper_bound"] == 0.0 and rxn["lower_bound"] == 0.0:
tmp_script_on = tmp_script_on + "false" + ",...\n"
else:
tmp_script_on = tmp_script_on + "true" + ",...\n"
tmp_script_id = (
tmp_script_id + "'" + rxn["rxn_id"] + "',...\n"
)
tmp_script_lb = tmp_script_lb + "];\n"
tmp_script_ub = tmp_script_ub + "];\n"
tmp_script_val = tmp_script_val + "];\n"
tmp_script_on = tmp_script_on + "];\n"
tmp_script_id = tmp_script_id + "};\n"
mat_script = (
tmp_script_lb
+ tmp_script_ub
+ tmp_script_val
+ tmp_script_on
+ tmp_script_id
)
return mat_script
[docs] def verify_and_estimate(self):
"""
Adds a QC step and defines the restarts for later processing
Returns
-------
mat_script : string
addition to MATLAB script under construction
"""
mat_script = "\nm.rates.flx.val = mod2stoich(m); % make sure the fluxes are feasible\n" # noqa E501
mat_script = (
mat_script
+ "m.options.fit_starts = 10; % 10 restarts during the estimation procedure\n" # noqa E501
)
return mat_script
[docs] def add_experimental_parameters(
self,
experimentalMS_data_I,
tracer_I,
measuredFluxes_data_I,
atomMappingMetabolite_data_I,
):
"""
Defines the measured fragments and adds tracer information
Parameters
----------
experimentalMS_data_I : pandas.DataFrame
pre-processed
experimentalMS_data_I input data
tracer_I : pandas.DataFrame
pre-processed
tracer_I input data
measuredFluxes_data_I : pandas.DataFrame
pre-processed
measuredFluxes_data_I input data
atomMappingMetabolite_data_I : pandas.DataFrame
pre-processed
atomMappingMetabolite_data_I input data
Returns
-------
mat_script : string
addition to MATLAB script under construction
fragments_used : list
List of the fragments in the data that fit into
the defined parameters
"""
mat_script = ""
fragments_used = []
experiments_all = [
x["experiment_id"] for cnt, x in experimentalMS_data_I.iterrows()
]
experiments = list(set(experiments_all))
experiments.sort()
fragments_all = [
x["fragment_id"] for cnt, x in experimentalMS_data_I.iterrows()
]
fragments = list(set(fragments_all))
fragments.sort()
for experiment_cnt, experiment in enumerate(experiments):
tmp_script = "\n% define which fragments of molecules were measured in which experiment\nd = msdata({...\n" # noqa E501
for frg_cnt, fragment in enumerate(fragments):
for ms_cnt, ms_data in experimentalMS_data_I.iterrows():
if (
ms_data["fragment_id"] == fragment
and ms_data["experiment_id"] == experiment
):
met_positions = [
int(x)
for x in self.prepare_input(
ms_data["met_atompositions"], "Curly"
)
]
atomMapping_atompositions_row = atomMappingMetabolite_data_I[
atomMappingMetabolite_data_I["met_id"]
== ms_data["met_id"]
]
if (
len(
list(
atomMapping_atompositions_row[
"met_atompositions"
]
)
)
> 0
):
atomMapping_atompositions = [
int(x)
for x in self.prepare_input(
list(
atomMapping_atompositions_row[
"met_atompositions"
]
)[0],
"Curly",
)
]
if max(met_positions) <= max(
atomMapping_atompositions
):
fragments_used.append(ms_data["fragment_id"])
tmp_script = (
tmp_script
+ "'"
+ ms_data["fragment_id"]
+ ": "
+ ms_data["met_id"]
+ " @ "
)
met_elements = self.prepare_input(
ms_data["met_elements"], "Curly"
)
for pos_cnt, pos in enumerate(met_positions):
tmp_script = (
tmp_script
+ met_elements[pos_cnt]
+ str(pos + 1)
+ " "
)
tmp_script = tmp_script[:-1]
tmp_script = tmp_script + "';\n"
break
else:
break
tmp_script = tmp_script + "});\n"
tmp_script = (
tmp_script
+ "\n% initialize mass distribution vector\nd.idvs = idv;\n"
)
mat_script = mat_script + tmp_script
# write substrate labeling (i.e. tracer) information
tmp_script = ""
tmp_script = (
tmp_script
+ "\n% define tracers used in the experiments\nt = tracer({...\n" # noqa E501
)
for tracer_cnt, tracer in tracer_I.iterrows():
if tracer["experiment_id"] == experiment:
tmp_script = (
tmp_script
+ "'"
+ tracer["met_name"]
+ ": "
+ tracer["met_id"]
+ ".EX"
+ " @ "
)
tracer_met_atompositions = self.prepare_input(
tracer["met_atompositions"], "Curly"
)
tracer_met_elements = self.prepare_input(
tracer["met_elements"], "Curly"
)
for cnt, met_atompositions in enumerate(
tracer_met_atompositions
):
tmp_script = (
tmp_script
+ tracer_met_elements[cnt]
+ str(met_atompositions)
+ " "
)
# remove extra whitespace
tmp_script = tmp_script[:-1]
tmp_script = tmp_script + "';...\n"
tmp_script = tmp_script + "});\n"
tmp_script = (
tmp_script
+ "\n% define fractions of tracers used\nt.frac = [ "
)
for tracer_cnt, tracer in tracer_I.iterrows():
if tracer["experiment_id"] == experiment:
tmp_script = tmp_script + str(tracer["ratio"]) + ","
# remove extra comma
tmp_script = tmp_script[:-1]
tmp_script = tmp_script + " ];\n"
mat_script = mat_script + tmp_script
# write flux measurements
tmp_script = ""
tmp_script = (
tmp_script + "\n% define experiments for fit data\nf = data(' "
)
for flux_cnt, flux in measuredFluxes_data_I.iterrows():
if flux["experiment_id"] == experiment:
tmp_script = tmp_script + flux["rxn_id"] + " "
tmp_script = tmp_script[:-1]
tmp_script = tmp_script + " ');\n"
tmp_script = tmp_script + "\n% add fit values\nf.val = [...\n"
for flux_cnt, flux in measuredFluxes_data_I.iterrows():
if flux["experiment_id"] == experiment:
tmp_script = (
tmp_script + str(flux["flux_average"]) + ",...\n"
)
tmp_script = tmp_script + "];\n"
tmp_script = tmp_script + "% add fit stds\nf.std = [...\n"
for flux_cnt, flux in measuredFluxes_data_I.iterrows():
if flux["experiment_id"] == experiment:
tmp_script = (
tmp_script + str(flux["flux_stdev"]) + ",...\n"
)
tmp_script = tmp_script + "];\n"
tmp_script = (
tmp_script
+ "\n% initialize experiment with t and add f and d\nx = experiment(t);\n" # noqa E501
)
tmp_script = tmp_script + "x.data_flx = f;\n"
tmp_script = tmp_script + "x.data_ms = d;\n"
tmp_script = (
tmp_script
+ "\n% assing all the previous values to a specific experiment"
)
tmp_script = tmp_script + "\nm.expts(%d) = x;\n" % (
experiment_cnt + 1
)
tmp_script = tmp_script + "\nm.expts(%d).id = {'%s'};\n" % (
experiment_cnt + 1,
experiment,
)
mat_script = mat_script + tmp_script
return mat_script, fragments_used
# check the stedv stuff before merging to main
[docs] def mapping(self, experimentalMS_data_I, fragments_used):
"""
Adds MS data to measured fragments
Parameters
----------
experimentalMS_data_I : pandas.DataFrame
pre-processed
experimentalMS_data_I input data
fragments_used : list
List of the fragments in the data that fit into
the defined parameters from "add_experimental_parameters()"
Returns
-------
mat_script : string
addition to MATLAB script under construction
"""
experiments_all = [
x["experiment_id"] for cnt, x in experimentalMS_data_I.iterrows()
]
experiments = list(set(experiments_all))
experiments.sort()
fragments = fragments_used
times_all = [
x["time_point"] for cnt, x in experimentalMS_data_I.iterrows()
]
times = list(set(times_all))
times.sort()
mat_script = ""
# Add in ms data or Write ms data to separate file
for experiment_cnt, experiment in enumerate(experiments):
tmp_script = "\n% add experimental data for annotated fragments\n"
for i, fragment in enumerate(fragments):
for j, time_j in enumerate(times):
for cnt_x, ms_data in experimentalMS_data_I.iterrows():
if (
ms_data["fragment_id"] == fragments[i]
and ms_data["time_point"] == times[j]
and ms_data["experiment_id"] == experiment
):
if not pd.isna(
ms_data["intensity_normalized_average"]
):
ms_data_norm_ave_int = [
float(x)
for x in ms_data[
"intensity_normalized_average"
]
.strip("}{")
.split(",")
]
ms_data_norm_stdev_int = [
float(x)
for x in ms_data[
"intensity_normalized_stdev"
]
.strip("}{")
.split(",")
]
for cnt, intensity in enumerate(
ms_data_norm_ave_int
):
# each column is a seperate time point
# each row is a seperate idv
if cnt == 0:
# Assign names and times
name = (
fragment
+ "_"
+ str(cnt)
+ "_"
+ str(j)
+ "_"
+ str(experiment)
)
tmp_script = tmp_script + (
"m.expts(%d).data_ms(%d).idvs.id(%d,%d) = {'%s'};\n" # noqa E501
% (
experiment_cnt + 1,
i + 1,
cnt + 1,
j + 1,
name,
)
)
tmp_script = tmp_script + (
"m.expts(%d).data_ms(%d).idvs.time(%d,%d) = %s;\n" # noqa E501
% (
experiment_cnt + 1,
i + 1,
cnt + 1,
j + 1,
time_j,
)
)
# Assign values
ave = intensity
stdev = ms_data_norm_stdev_int[cnt]
# remove 0.0000 values and replace with NaN
if ave <= 1e-6:
ave = "NaN"
tmp_script = tmp_script + (
"m.expts(%d).data_ms(%d).idvs.val(%d,%d) = %s;\n" # noqa E501
% (
experiment_cnt + 1,
i + 1,
cnt + 1,
j + 1,
ave,
)
)
else:
tmp_script = tmp_script + (
"m.expts(%d).data_ms(%d).idvs.val(%d,%d) = %f;\n" # noqa E501
% (
experiment_cnt + 1,
i + 1,
cnt + 1,
j + 1,
ave,
)
)
if stdev <= 1e-3:
if stdev == 0.0:
stdev = 0.05
else:
stdev = 0.001
tmp_script = tmp_script + (
"m.expts(%d).data_ms(%d).idvs.std(%d,%d) = %s;\n" # noqa E501
% (
experiment_cnt + 1,
i + 1,
cnt + 1,
j + 1,
stdev,
)
)
else:
tmp_script = tmp_script + (
"m.expts(%d).data_ms(%d).idvs.std(%d,%d) = %f;\n" # noqa E501
% (
experiment_cnt + 1,
i + 1,
cnt + 1,
j + 1,
stdev,
)
)
else:
ave == "NaN"
stdev = "NaN"
tmp_script = tmp_script + (
"m.expts(%d).data_ms(%d).idvs.std(%d,%d) = %s;\n" # noqa E501
% (
experiment_cnt + 1,
i + 1,
cnt + 1,
j + 1,
stdev,
)
)
mat_script = mat_script + tmp_script
return mat_script
[docs] def script_generator(
self,
modelReaction_data_I,
atomMappingReactions_data_I,
atomMappingMetabolite_data_I,
measuredFluxes_data_I,
experimentalMS_data_I,
tracer_I,
):
"""
Combines the functions that construct the model
Parameters
----------
modelReaction_data_I : pandas.DataFrame
pre-processed
modelReaction_data_I input data
atomMappingReactions_data_I : pandas.DataFrame
pre-processed
atomMappingReactions_data_I input data
atomMappingMetabolite_data_I : pandas.DataFrame
pre-processed
atomMappingMetabolite_data_I input data
measuredFluxes_data_I : pandas.DataFrame
pre-processed
measuredFluxes_data_I input data
experimentalMS_data_I : pandas.DataFrame
pre-processed
experimentalMS_data_I input data
tracer_I : pandas.DataFrame
pre-processed
tracer_I input data
Returns
-------
script : string
combined parts of the MATLAB script constrcted
by the previously defined functions
"""
script = ""
script = self.initiate_MATLAB_script()
script_temp, model_rxn_ids = self.add_reactions_to_script(
modelReaction_data_I, atomMappingReactions_data_I
)
script += script_temp
script += self.initialize_model()
script += self.symmetrical_metabolites(atomMappingMetabolite_data_I)
script += self.unbalanced_reactions(atomMappingMetabolite_data_I)
script += self.add_reaction_parameters(
modelReaction_data_I, measuredFluxes_data_I, model_rxn_ids
)
script += self.verify_and_estimate()
script_temp, fragments_used = self.add_experimental_parameters(
experimentalMS_data_I,
tracer_I,
measuredFluxes_data_I,
atomMappingMetabolite_data_I,
)
script += script_temp
script += self.mapping(experimentalMS_data_I, fragments_used)
return script
[docs] def save_INCA_script(self, script, scriptname):
"""
Writes the output file
Parameters
----------
script : string
output from "script_generator()"
scriptname : string
user defined name of the .m output file
Outputs
-------
INCA script : .m file
"""
file1 = open(scriptname + ".m", "w")
file1.write(script)
file1.close()
[docs] def runner_script_generator(self, output_filename, n_estimates=10):
"""
Adds the functions needed to run the script and export the .mat file
Parameters
----------
output_filename : string
user defined name of the .mat output file, the
INCA output file
n_estimates : int
number of times the fluxes will be estimated
Returns
-------
runner : string
MATLAB script that will run the previously created
INCA script
"""
runner = (
"f=estimate(m,"
+ str(n_estimates)
+ ");\n\nf=continuate(f,m);\n\nfilename = '"
+ output_filename
+ ".mat';\nsave(filename,'f','m')"
)
return runner
[docs] def save_runner_script(self, runner, scriptname):
"""
Writes the runner output file
Parameters
----------
runner : string
previously created runner script
scriptname : string
name of the runner file, can be the same as
the scriptname of the INCA script
Outputs
-------
runner script : .m file
"""
file2 = open(scriptname + "_runner.m", "w")
file2.write(runner)
file2.close()
[docs] def run_INCA_in_MATLAB(
self, INCA_base_directory, script_folder, matlab_script, runner_script
):
"""
Executes the script in MATLAB using INCA
Prints time and produces .mat file
Parameters
----------
INCA_base_directory : path
the path to the base folder of your INCA installation
script_folder : path
the path to the location of the file you're working on
matlab_script : string
name of the .m file (without suffix)
runner_script : string
name of the runner file (usually matlab_script name +
'_runner')
Outputs
-------
output of INCA : .mat file
"""
start_time = time.time()
eng = matlab.engine.start_matlab()
eng.cd(r"" + INCA_base_directory, nargout=0)
eng.startup(nargout=0)
eng.setpath(nargout=0)
eng.cd(r"" + script_folder, nargout=0)
_f = getattr(eng, matlab_script)
_f(nargout=0)
_f2 = getattr(eng, runner_script)
_f2(nargout=0)
eng.quit()
print("--- %s seconds -" % (time.time() - start_time))
script_generator_descr = INCA_script()
"""A class to write and execute an INCA script in MATLAB.
Examples
--------
>>> from BFAIR.INCA import INCA_script
After pre-processing the input, the script can either be generated all at once
>>> INCA_script = INCA_script()
>>> script = INCA_script.script_generator(
modelReaction_data_I,
atomMappingReactions_data_I,
atomMappingMetabolite_data_I,
measuredFluxes_data_I,
experimentalMS_data_I,
tracer_I
)
Or sequentially (not shown)
After generation, the script can be saved
>>> INCA_script.save_INCA_script(script, "testscript")
Same goesfor the runner script
>>> runner = INCA_script.runner_script_generator('TestFile', 10)
>>> INCA_script.save_runner_script(runner=runner, scriptname="testscript")
With working INCA and MATLAB installations and an active MATLAB engine, the script can be executed in python # noqa E501
>>> INCA_base_directory = "/Users/Username/Documents/INCAv1.9"
>>> script_folder = %pwd
>>> matlab_script = "testscript"
>>> runner_script = matlab_script + "_runner"
>>> INCA_script.run_INCA_in_MATLAB(
>>> INCA_base_directory,
>>> script_folder,
>>> matlab_script,
>>> runner_script
>>> )
For more information on how to use this module or visualize the data, please check the example notebook in the BFAIR repository.""" # noqa E501