Source code for BFAIR.mfa.INCA.INCA_script_generator

# -*- coding: utf-8 -*-
"""Main module."""

# Libraries

import pandas as pd
import numpy as np
import time
import ast
import sys

try:
    import matlab.engine
except ModuleNotFoundError:
    print(
        "Please check the README for a guide on how to "
        "install the MATLAB engine"
    )

__version__ = "0.0.1"


class INCA_script:
    def __init__(self):
        """
        Class of functions to set up a MATLAB script for MFA using
        INCA and for interaction with MATLAB itself. Will later on
        also re-import data from .mat file.
        """

[docs] def limit_to_one_model(self, data_input, model_name_column, model_name): """ Limits the data to values that are assigned to one metabolic model Parameters ---------- data_input : pandas.DataFrame Input data file that needs to be processed model_name_column : string Column name where model names are defined model_name : string Name of the model the data will be limited to Returns ------- data_input : pandas.DataFrame Limited data """ data_output = pd.DataFrame() for i, row in data_input.iterrows(): if row[model_name_column] == model_name: if len(data_output) == 0: data_output = pd.DataFrame.transpose(row.to_frame()) else: data_output = data_output.append( pd.DataFrame.transpose(row.to_frame()) ) data_input = data_output return data_input
[docs] def limit_to_one_experiment( self, data_input, experiment_name_column, experiment_name ): """ Limits the data to values that were acquired in one experiment Parameters ---------- data_input : pandas.DataFrame Input data file that needs to be processed experiment_name_column : string Column name where experiment names are defined model_name : string Name of the model the data will be limited to Returns ------- data_input : pandas.DataFrame Limited data """ data_output = pd.DataFrame() for i, row in data_input.iterrows(): if row[experiment_name_column] == experiment_name: if len(data_output) == 0: data_output = pd.DataFrame.transpose(row.to_frame()) else: data_output = data_output.append( pd.DataFrame.transpose(row.to_frame()) ) data_input = data_output return data_input
def prepare_input( self, string, type_of_replacement=["Curly", "Double_square"] ): """ Process data that is stored in strings of lists etc in the files Parameters ---------- string : string, in this set up a single cell in a pandas.DataFrame Processes the data in cells in the dataframes. The info is either bordered by curly or double square brackets. type_of_replacement : string Define the type of surrounding brackets Returns ------- string : list returns data in lists without bordering brackets """ if type_of_replacement == "Curly": string = string.strip("}{").split(",") elif type_of_replacement == "Double_square": string = string[1:-1] string = ast.literal_eval(string) return string
[docs] def initiate_MATLAB_script(self): """ Starts writing the MATLAB script Returns ------- mat_script : string Initialized MATLAB script """ mat_script = "clear functions\n\n" return mat_script
def reaction_mapping( self, atomMapping_molecules_ids, model_molecules_ids, atomMapping_molecules_stoichiometry, atomMapping_molecules_elements, atomMapping_molecules_mapping, model_molecules_stoichiometry, reaction_type="reactant", ): """ Provides the carbon mapping for metabolites in a model. Called within "add_reactions_to_script()" Parameters ---------- atomMapping_molecules_ids : list processed atomMappingReactions_data_I reactants_ids_tracked or products_ids_tracked model_molecules_ids : list processed modelReaction_data_I reactants_ids or products_ids atomMapping_molecules_stoichiometry : list processed atomMappingReactions_data_I reactants_stoichiometry_tracked or productss_stoichiometry_tracked atomMapping_molecules_elements : list processed atomMappingReactions_data_I reactants_elements_tracked or products_elements_tracked atomMapping_molecules_mapping : list processed atomMappingReactions_data_I reactants_mapping or products_mapping model_molecules_stoichiometry : list processed modelReaction_data_I reactants_stoichiometry or products_stoichiometry reaction_type : string reactant or product Returns ------- rxn_equation : string Reaction equation for the defined reaction """ rxn_equation = "" if reaction_type == "product": rxn_equation += " -> " # This had to be added because some metabolites appear more than once # in one reaction with different mappings seen_molecule = {} dupes_molecule = [] if not len(atomMapping_molecules_ids) == len( set(atomMapping_molecules_ids) ): for x in atomMapping_molecules_ids: if x not in seen_molecule: seen_molecule[x] = 1 else: if seen_molecule[x] == 1: dupes_molecule.append(x) seen_molecule[x] += 1 # Iterate through the metabolites involved in the reactions for molecule_cnt, molecule in enumerate(model_molecules_ids): # Indices for metabolites with multiple different mappings # are being sought # initiate boolean for "while" loop duplicates_not_done = True duplicates_counter = 0 while duplicates_not_done: # this little bit adds +s if it's not the first molecule if molecule_cnt > 0: rxn_equation += " + " if molecule in atomMapping_molecules_ids: if molecule in dupes_molecule: positions = [ i for i, x in enumerate(atomMapping_molecules_ids) if x == molecule ] molecule_index = positions[duplicates_counter] if duplicates_counter == len(positions) - 1: duplicates_not_done = False duplicates_counter += 1 else: molecule_index = atomMapping_molecules_ids.index( molecule ) duplicates_not_done = False # here we check if it is a tuple, i.e. there is more than # one metabolite, or a list, i.e. only one metabolite rxn_equation += ( str( atomMapping_molecules_stoichiometry[molecule_index] ) + "*" + molecule + " (" ) if type(atomMapping_molecules_elements) is tuple: # here we go through the mapping, enumerate the atoms # and add the corresponding mapping for mapping_cnt, mapping in enumerate( atomMapping_molecules_elements[molecule_index] ): # to remove the final whitespace if (mapping_cnt + 1) == len( atomMapping_molecules_mapping[molecule_index] ): rxn_equation += ( mapping + str(mapping_cnt + 1) + ":" + atomMapping_molecules_mapping[ molecule_index ][mapping_cnt] ) else: rxn_equation += ( mapping + str(mapping_cnt + 1) + ":" + atomMapping_molecules_mapping[ molecule_index ][mapping_cnt] + " " ) elif type(atomMapping_molecules_elements) is list: for mapping_cnt, mapping in enumerate( atomMapping_molecules_elements ): # to remove the final whitespace if (mapping_cnt + 1) == len( atomMapping_molecules_mapping[0] ): rxn_equation += ( mapping + str(mapping_cnt + 1) + ":" + atomMapping_molecules_mapping[0][ mapping_cnt ] ) else: rxn_equation += ( mapping + str(mapping_cnt + 1) + ":" + atomMapping_molecules_mapping[0][ mapping_cnt ] + " " ) # close the brackets for mapping rxn_equation += ")" # Exceptions that are in the model but not # tracked are added here else: rxn_equation += ( str(model_molecules_stoichiometry[molecule_cnt]) + "*" + molecule ) duplicates_not_done = False return rxn_equation
[docs] def add_reactions_to_script( self, modelReaction_data_I, atomMappingReactions_data_I ): """ Translates the model and adds mapping using reaction_mapping() Parameters ---------- modelReaction_data_I : pandas.DataFrame pre-processed modelReaction_data_I input data atomMappingReactions_data_I : pandas.DataFrame pre-processed atomMappingReactions_data_I input data Returns ------- mat_script : string Extention to the MATLAB script under construction model_rxn_ids_exp : list List of reaction IDs used for the model """ if len(atomMappingReactions_data_I["rxn_id"]) != len( atomMappingReactions_data_I["rxn_id"].unique() ): sys.exit( "There is a duplicate reaction in atomMappingReactions_data_I" ) mat_script = "r = reaction({... % define reactions\n" model_rxn_ids = [] model_rxn_ids_exp = [] # First we go through the reactions in the model to make sure that # we only include the ones that are actually there for cnt, modelReaction_data in modelReaction_data_I.iterrows(): if modelReaction_data["used_"]: model_rxn_id = modelReaction_data["rxn_id"] model_rxn_ids.append(model_rxn_id) model_rxn_ids = sorted(model_rxn_ids, key=str.lower) for id_cnt, model_rxn_id in enumerate(model_rxn_ids): for cnt, modelReaction_data in modelReaction_data_I.iterrows(): # Model index_in_model = list(modelReaction_data_I["rxn_id"]).index( modelReaction_data["rxn_id"] ) model_reactants_stoichiometry = self.prepare_input( modelReaction_data_I.iloc[index_in_model].loc[ "reactants_stoichiometry" ], "Curly", ) model_products_stoichiometry = self.prepare_input( modelReaction_data_I.iloc[index_in_model].loc[ "products_stoichiometry" ], "Curly", ) if modelReaction_data["rxn_id"] == model_rxn_id: rxn_equation = "'" # we save the location of the row corresponding to # this reaction in the other file # "prepare_input" is a helper function to make the # entries useable, this will have to be checked with # other input # Atom Mapping index_in_atomMapping = list( atomMappingReactions_data_I["rxn_id"] ).index(modelReaction_data["rxn_id"]) atomMapping_reactants_stoichiometry = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["reactants_stoichiometry_tracked"], "Curly", ) atomMapping_products_stoichiometry = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["products_stoichiometry_tracked"], "Curly", ) # this step makes sure that we exclude reactions that # do not have any stoichiometry annotated if ( atomMapping_reactants_stoichiometry[0] == "" and atomMapping_products_stoichiometry[0] == "" ): print( "There is no stoichimetriy given for:", model_rxn_id, ) # Model model_reactants_ids = self.prepare_input( modelReaction_data_I.iloc[index_in_model].loc[ "reactants_ids" ], "Curly", ) model_products_ids = self.prepare_input( modelReaction_data_I.iloc[index_in_model].loc[ "products_ids" ], "Curly", ) model_reactants_stoichiometry = [ float(i) * -1 for i in model_reactants_stoichiometry ] model_products_stoichiometry = [ float(i) for i in model_products_stoichiometry ] for reactant_cnt, reactant in enumerate( model_reactants_ids ): # this little bit adds +s if it's not # the first reactant if reactant_cnt > 0: rxn_equation += " + " rxn_equation += ( str( model_reactants_stoichiometry[reactant_cnt] ) + "*" + reactant ) rxn_equation += " -> " for product_cnt, product in enumerate( model_products_ids ): # this little bit adds +s if it's not # the first product if product_cnt > 0: rxn_equation += " + " rxn_equation += ( str(model_products_stoichiometry[product_cnt]) + "*" + product ) else: # process the remaining info about the metabolites. # Done here because it can throw an error if # anything is empty # Atom Mapping atomMapping_reactants_ids = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["reactants_ids_tracked"], "Curly", ) atomMapping_products_ids = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["products_ids_tracked"], "Curly", ) atomMapping_reactants_elements = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["reactants_elements_tracked"], "Double_square", ) atomMapping_products_elements = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["products_elements_tracked"], "Double_square", ) atomMapping_reactants_mapping = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["reactants_mapping"], "Curly", ) atomMapping_products_mapping = self.prepare_input( atomMappingReactions_data_I.iloc[ index_in_atomMapping ].loc["products_mapping"], "Curly", ) atomMapping_reactants_stoichiometry = [ float(i) * -1 for i in atomMapping_reactants_stoichiometry ] atomMapping_products_stoichiometry = [ float(i) for i in atomMapping_products_stoichiometry ] # Model model_reactants_ids = self.prepare_input( modelReaction_data_I.iloc[index_in_model].loc[ "reactants_ids" ], "Curly", ) model_products_ids = self.prepare_input( modelReaction_data_I.iloc[index_in_model].loc[ "products_ids" ], "Curly", ) model_reactants_stoichiometry = [ float(i) * -1 for i in model_reactants_stoichiometry ] model_products_stoichiometry = [ float(i) for i in model_products_stoichiometry ] rxn_equation = "'" # We go through the IDs to treat each # metabolite separately reactant_equation = self.reaction_mapping( atomMapping_reactants_ids, model_reactants_ids, atomMapping_reactants_stoichiometry, atomMapping_reactants_elements, atomMapping_reactants_mapping, model_reactants_stoichiometry, reaction_type="reactant", ) product_equation = self.reaction_mapping( atomMapping_products_ids, model_products_ids, atomMapping_products_stoichiometry, atomMapping_products_elements, atomMapping_products_mapping, model_products_stoichiometry, reaction_type="product", ) rxn_equation = ( rxn_equation + reactant_equation + product_equation ) # the ids of the equations are being added to the list that # will be exported model_rxn_ids_exp.append(model_rxn_id) mat_script += rxn_equation + " ';...\n" mat_script += "});\n\n" return mat_script, model_rxn_ids_exp
[docs] def initialize_model(self): """ Previously described reactions are assigned to a model object Returns ------- mat_script : string addition to MATLAB script under construction """ mat_script = "m = model(r); % set up model\n\n" return mat_script
[docs] def symmetrical_metabolites(self, atomMappingMetabolite_data_I): """ Takes care of symmetrical metabolites if not done so in the reaction equations Parameters ---------- atomMappingMetabolite_data_I : pandas.DataFrame pre-processed atomMappingMetabolite_data_I input data Returns ------- mat_script : string addition to MATLAB script under construction """ tmp_script = "% take care of symmetrical metabolites\n" for cnt_met, met in atomMappingMetabolite_data_I.iterrows(): if not pd.isna(met["met_symmetry_atompositions"]): tmp_script = ( tmp_script + "m.mets{'" + met["met_id"] + "'}.sym = list('rotate180', atommap('" ) met_positions = [ int(x) for x in self.prepare_input( met["met_atompositions"], "Curly" ) ] met_elements = self.prepare_input(met["met_elements"], "Curly") met["met_symmetry_elements"] = met[ "met_symmetry_elements" ].replace("{}", str(np.nan)) met_symmetry_elements = self.prepare_input( met["met_symmetry_elements"], "Curly" ) met["met_symmetry_atompositions"] = met[ "met_symmetry_atompositions" ].replace("{}", str(np.nan)) met_symmetry_atompositions = [ int(x) for x in self.prepare_input( met["met_symmetry_atompositions"], "Curly" ) ] for cnt, atompositions in enumerate(met_positions): tmp_script = ( tmp_script + met_elements[cnt] + str(atompositions + 1) + ":" + met_symmetry_elements[cnt] + str(met_symmetry_atompositions[cnt] + 1) + " " ) tmp_script = tmp_script[:-1] tmp_script = tmp_script + "'));\n" else: continue tmp_script = tmp_script + "\n" mat_script = tmp_script return mat_script
# NOTE: hard-coded for now until a better workaround can be done
[docs] def unbalanced_reactions( self, atomMappingMetabolite_data_I, unbalanced_metabolites=["co2_e", "h2o_e", "h_e", "na1_e"], ): """ Adds in the metabolite state (balanced or unbalanced) Parameters ---------- atomMappingMetabolite_data_I : pandas.DataFrame pre-processed atomMappingMetabolite_data_I input data unbalanced_metabolites : list of string elements list of unbalanced metabolites - hardcoded Returns ------- mat_script : string addition to MATLAB script under construction """ tmp_script = "% define unbalanced reactions\n" # specify reactions that should be forcible unbalanced metabolites_all = [ x["met_id"] for cnt, x in atomMappingMetabolite_data_I.iterrows() ] # unbalanced reactions: for met in unbalanced_metabolites: if met in metabolites_all: tmp_script = ( tmp_script + "m.states{'" + met + ".EX" + "'}.bal = false;\n" ) mat_script = tmp_script return mat_script
# Stedv stuff should be checked once more before merging to main
[docs] def add_reaction_parameters( self, modelReaction_data_I, measuredFluxes_data_I, model_rxn_ids, fluxes_present=True, ): """ Flux parameters are added. They correspond to the previously described reactions Parameters ---------- modelReaction_data_I : pandas.DataFrame pre-processed modelReaction_data_I input data measuredFluxes_data_I : pandas.DataFrame pre-processed measuredFluxes_data_I input data model_rxn_ids : list pre-processed model_rxn_ids input data fluxes_present : pandas.DataFrame confirm if fluxes are present. If not then the flux measuredFluxes_data_I file will be ignored Returns ------- mat_script : string addition to MATLAB script under construction """ # Add in initial fluxes (lb/ub, values, on/off) and define the # reaction ids # NOTE: lb, ub, val = 0 for steady-state # lower bounds tmp_script_lb = "\n% define lower bounds\nm.rates.flx.lb = [...\n" # upper bounds tmp_script_ub = "\n%define upper bounds\nm.rates.flx.ub = [...\n" # intial flux values tmp_script_val = "\n% define flux vals\nm.rates.flx.val = [...\n" # include/exclude a reaction from the simulation tmp_script_on = "\n% include/exclude reactions\nm.rates.on = [...\n" # rxn_ids tmp_script_id = "\n% define reaction ids\nm.rates.id = {...\n" for model_rxn_id in model_rxn_ids: for rxn_cnt, rxn in modelReaction_data_I.iterrows(): if rxn["rxn_id"] == model_rxn_id and rxn["used_"]: if fluxes_present: if rxn["rxn_id"] in list( measuredFluxes_data_I["rxn_id"] ): for ( flux_cnt, flux, ) in measuredFluxes_data_I.iterrows(): if rxn["rxn_id"] == flux["rxn_id"]: tmp_script_lb = ( tmp_script_lb + str(flux["flux_lb"]) + ",...\n" ) tmp_script_ub = ( tmp_script_ub + str(flux["flux_ub"]) + ",...\n" ) tmp_script_val = ( tmp_script_val + str(flux["flux_average"]) + ",...\n" ) else: tmp_script_lb = ( tmp_script_lb + str(rxn["lower_bound"]) + ",...\n" ) tmp_script_ub = ( tmp_script_ub + str(rxn["upper_bound"]) + ",...\n" ) tmp_script_val = tmp_script_val + str(0) + ",...\n" else: tmp_script_lb = ( tmp_script_lb + str(rxn["lower_bound"]) + ",...\n" ) tmp_script_ub = ( tmp_script_ub + str(rxn["upper_bound"]) + ",...\n" ) if rxn["upper_bound"] == 0.0 and rxn["lower_bound"] == 0.0: tmp_script_on = tmp_script_on + "false" + ",...\n" else: tmp_script_on = tmp_script_on + "true" + ",...\n" tmp_script_id = ( tmp_script_id + "'" + rxn["rxn_id"] + "',...\n" ) tmp_script_lb = tmp_script_lb + "];\n" tmp_script_ub = tmp_script_ub + "];\n" tmp_script_val = tmp_script_val + "];\n" tmp_script_on = tmp_script_on + "];\n" tmp_script_id = tmp_script_id + "};\n" mat_script = ( tmp_script_lb + tmp_script_ub + tmp_script_val + tmp_script_on + tmp_script_id ) return mat_script
[docs] def verify_and_estimate(self): """ Adds a QC step and defines the restarts for later processing Returns ------- mat_script : string addition to MATLAB script under construction """ mat_script = "\nm.rates.flx.val = mod2stoich(m); % make sure the fluxes are feasible\n" # noqa E501 mat_script = ( mat_script + "m.options.fit_starts = 10; % 10 restarts during the estimation procedure\n" # noqa E501 ) return mat_script
[docs] def add_experimental_parameters( self, experimentalMS_data_I, tracer_I, measuredFluxes_data_I, atomMappingMetabolite_data_I, ): """ Defines the measured fragments and adds tracer information Parameters ---------- experimentalMS_data_I : pandas.DataFrame pre-processed experimentalMS_data_I input data tracer_I : pandas.DataFrame pre-processed tracer_I input data measuredFluxes_data_I : pandas.DataFrame pre-processed measuredFluxes_data_I input data atomMappingMetabolite_data_I : pandas.DataFrame pre-processed atomMappingMetabolite_data_I input data Returns ------- mat_script : string addition to MATLAB script under construction fragments_used : list List of the fragments in the data that fit into the defined parameters """ mat_script = "" fragments_used = [] experiments_all = [ x["experiment_id"] for cnt, x in experimentalMS_data_I.iterrows() ] experiments = list(set(experiments_all)) experiments.sort() fragments_all = [ x["fragment_id"] for cnt, x in experimentalMS_data_I.iterrows() ] fragments = list(set(fragments_all)) fragments.sort() for experiment_cnt, experiment in enumerate(experiments): tmp_script = "\n% define which fragments of molecules were measured in which experiment\nd = msdata({...\n" # noqa E501 for frg_cnt, fragment in enumerate(fragments): for ms_cnt, ms_data in experimentalMS_data_I.iterrows(): if ( ms_data["fragment_id"] == fragment and ms_data["experiment_id"] == experiment ): met_positions = [ int(x) for x in self.prepare_input( ms_data["met_atompositions"], "Curly" ) ] atomMapping_atompositions_row = atomMappingMetabolite_data_I[ atomMappingMetabolite_data_I["met_id"] == ms_data["met_id"] ] if ( len( list( atomMapping_atompositions_row[ "met_atompositions" ] ) ) > 0 ): atomMapping_atompositions = [ int(x) for x in self.prepare_input( list( atomMapping_atompositions_row[ "met_atompositions" ] )[0], "Curly", ) ] if max(met_positions) <= max( atomMapping_atompositions ): fragments_used.append(ms_data["fragment_id"]) tmp_script = ( tmp_script + "'" + ms_data["fragment_id"] + ": " + ms_data["met_id"] + " @ " ) met_elements = self.prepare_input( ms_data["met_elements"], "Curly" ) for pos_cnt, pos in enumerate(met_positions): tmp_script = ( tmp_script + met_elements[pos_cnt] + str(pos + 1) + " " ) tmp_script = tmp_script[:-1] tmp_script = tmp_script + "';\n" break else: break tmp_script = tmp_script + "});\n" tmp_script = ( tmp_script + "\n% initialize mass distribution vector\nd.idvs = idv;\n" ) mat_script = mat_script + tmp_script # write substrate labeling (i.e. tracer) information tmp_script = "" tmp_script = ( tmp_script + "\n% define tracers used in the experiments\nt = tracer({...\n" # noqa E501 ) for tracer_cnt, tracer in tracer_I.iterrows(): if tracer["experiment_id"] == experiment: tmp_script = ( tmp_script + "'" + tracer["met_name"] + ": " + tracer["met_id"] + ".EX" + " @ " ) tracer_met_atompositions = self.prepare_input( tracer["met_atompositions"], "Curly" ) tracer_met_elements = self.prepare_input( tracer["met_elements"], "Curly" ) for cnt, met_atompositions in enumerate( tracer_met_atompositions ): tmp_script = ( tmp_script + tracer_met_elements[cnt] + str(met_atompositions) + " " ) # remove extra whitespace tmp_script = tmp_script[:-1] tmp_script = tmp_script + "';...\n" tmp_script = tmp_script + "});\n" tmp_script = ( tmp_script + "\n% define fractions of tracers used\nt.frac = [ " ) for tracer_cnt, tracer in tracer_I.iterrows(): if tracer["experiment_id"] == experiment: tmp_script = tmp_script + str(tracer["ratio"]) + "," # remove extra comma tmp_script = tmp_script[:-1] tmp_script = tmp_script + " ];\n" mat_script = mat_script + tmp_script # write flux measurements tmp_script = "" tmp_script = ( tmp_script + "\n% define experiments for fit data\nf = data(' " ) for flux_cnt, flux in measuredFluxes_data_I.iterrows(): if flux["experiment_id"] == experiment: tmp_script = tmp_script + flux["rxn_id"] + " " tmp_script = tmp_script[:-1] tmp_script = tmp_script + " ');\n" tmp_script = tmp_script + "\n% add fit values\nf.val = [...\n" for flux_cnt, flux in measuredFluxes_data_I.iterrows(): if flux["experiment_id"] == experiment: tmp_script = ( tmp_script + str(flux["flux_average"]) + ",...\n" ) tmp_script = tmp_script + "];\n" tmp_script = tmp_script + "% add fit stds\nf.std = [...\n" for flux_cnt, flux in measuredFluxes_data_I.iterrows(): if flux["experiment_id"] == experiment: tmp_script = ( tmp_script + str(flux["flux_stdev"]) + ",...\n" ) tmp_script = tmp_script + "];\n" tmp_script = ( tmp_script + "\n% initialize experiment with t and add f and d\nx = experiment(t);\n" # noqa E501 ) tmp_script = tmp_script + "x.data_flx = f;\n" tmp_script = tmp_script + "x.data_ms = d;\n" tmp_script = ( tmp_script + "\n% assing all the previous values to a specific experiment" ) tmp_script = tmp_script + "\nm.expts(%d) = x;\n" % ( experiment_cnt + 1 ) tmp_script = tmp_script + "\nm.expts(%d).id = {'%s'};\n" % ( experiment_cnt + 1, experiment, ) mat_script = mat_script + tmp_script return mat_script, fragments_used
# check the stedv stuff before merging to main
[docs] def mapping(self, experimentalMS_data_I, fragments_used): """ Adds MS data to measured fragments Parameters ---------- experimentalMS_data_I : pandas.DataFrame pre-processed experimentalMS_data_I input data fragments_used : list List of the fragments in the data that fit into the defined parameters from "add_experimental_parameters()" Returns ------- mat_script : string addition to MATLAB script under construction """ experiments_all = [ x["experiment_id"] for cnt, x in experimentalMS_data_I.iterrows() ] experiments = list(set(experiments_all)) experiments.sort() fragments = fragments_used times_all = [ x["time_point"] for cnt, x in experimentalMS_data_I.iterrows() ] times = list(set(times_all)) times.sort() mat_script = "" # Add in ms data or Write ms data to separate file for experiment_cnt, experiment in enumerate(experiments): tmp_script = "\n% add experimental data for annotated fragments\n" for i, fragment in enumerate(fragments): for j, time_j in enumerate(times): for cnt_x, ms_data in experimentalMS_data_I.iterrows(): if ( ms_data["fragment_id"] == fragments[i] and ms_data["time_point"] == times[j] and ms_data["experiment_id"] == experiment ): if not pd.isna( ms_data["intensity_normalized_average"] ): ms_data_norm_ave_int = [ float(x) for x in ms_data[ "intensity_normalized_average" ] .strip("}{") .split(",") ] ms_data_norm_stdev_int = [ float(x) for x in ms_data[ "intensity_normalized_stdev" ] .strip("}{") .split(",") ] for cnt, intensity in enumerate( ms_data_norm_ave_int ): # each column is a seperate time point # each row is a seperate idv if cnt == 0: # Assign names and times name = ( fragment + "_" + str(cnt) + "_" + str(j) + "_" + str(experiment) ) tmp_script = tmp_script + ( "m.expts(%d).data_ms(%d).idvs.id(%d,%d) = {'%s'};\n" # noqa E501 % ( experiment_cnt + 1, i + 1, cnt + 1, j + 1, name, ) ) tmp_script = tmp_script + ( "m.expts(%d).data_ms(%d).idvs.time(%d,%d) = %s;\n" # noqa E501 % ( experiment_cnt + 1, i + 1, cnt + 1, j + 1, time_j, ) ) # Assign values ave = intensity stdev = ms_data_norm_stdev_int[cnt] # remove 0.0000 values and replace with NaN if ave <= 1e-6: ave = "NaN" tmp_script = tmp_script + ( "m.expts(%d).data_ms(%d).idvs.val(%d,%d) = %s;\n" # noqa E501 % ( experiment_cnt + 1, i + 1, cnt + 1, j + 1, ave, ) ) else: tmp_script = tmp_script + ( "m.expts(%d).data_ms(%d).idvs.val(%d,%d) = %f;\n" # noqa E501 % ( experiment_cnt + 1, i + 1, cnt + 1, j + 1, ave, ) ) if stdev <= 1e-3: if stdev == 0.0: stdev = 0.05 else: stdev = 0.001 tmp_script = tmp_script + ( "m.expts(%d).data_ms(%d).idvs.std(%d,%d) = %s;\n" # noqa E501 % ( experiment_cnt + 1, i + 1, cnt + 1, j + 1, stdev, ) ) else: tmp_script = tmp_script + ( "m.expts(%d).data_ms(%d).idvs.std(%d,%d) = %f;\n" # noqa E501 % ( experiment_cnt + 1, i + 1, cnt + 1, j + 1, stdev, ) ) else: ave == "NaN" stdev = "NaN" tmp_script = tmp_script + ( "m.expts(%d).data_ms(%d).idvs.std(%d,%d) = %s;\n" # noqa E501 % ( experiment_cnt + 1, i + 1, cnt + 1, j + 1, stdev, ) ) mat_script = mat_script + tmp_script return mat_script
[docs] def script_generator( self, modelReaction_data_I, atomMappingReactions_data_I, atomMappingMetabolite_data_I, measuredFluxes_data_I, experimentalMS_data_I, tracer_I, ): """ Combines the functions that construct the model Parameters ---------- modelReaction_data_I : pandas.DataFrame pre-processed modelReaction_data_I input data atomMappingReactions_data_I : pandas.DataFrame pre-processed atomMappingReactions_data_I input data atomMappingMetabolite_data_I : pandas.DataFrame pre-processed atomMappingMetabolite_data_I input data measuredFluxes_data_I : pandas.DataFrame pre-processed measuredFluxes_data_I input data experimentalMS_data_I : pandas.DataFrame pre-processed experimentalMS_data_I input data tracer_I : pandas.DataFrame pre-processed tracer_I input data Returns ------- script : string combined parts of the MATLAB script constrcted by the previously defined functions """ script = "" script = self.initiate_MATLAB_script() script_temp, model_rxn_ids = self.add_reactions_to_script( modelReaction_data_I, atomMappingReactions_data_I ) script += script_temp script += self.initialize_model() script += self.symmetrical_metabolites(atomMappingMetabolite_data_I) script += self.unbalanced_reactions(atomMappingMetabolite_data_I) script += self.add_reaction_parameters( modelReaction_data_I, measuredFluxes_data_I, model_rxn_ids ) script += self.verify_and_estimate() script_temp, fragments_used = self.add_experimental_parameters( experimentalMS_data_I, tracer_I, measuredFluxes_data_I, atomMappingMetabolite_data_I, ) script += script_temp script += self.mapping(experimentalMS_data_I, fragments_used) return script
[docs] def save_INCA_script(self, script, scriptname): """ Writes the output file Parameters ---------- script : string output from "script_generator()" scriptname : string user defined name of the .m output file Outputs ------- INCA script : .m file """ file1 = open(scriptname + ".m", "w") file1.write(script) file1.close()
[docs] def runner_script_generator(self, output_filename, n_estimates=10): """ Adds the functions needed to run the script and export the .mat file Parameters ---------- output_filename : string user defined name of the .mat output file, the INCA output file n_estimates : int number of times the fluxes will be estimated Returns ------- runner : string MATLAB script that will run the previously created INCA script """ runner = ( "f=estimate(m," + str(n_estimates) + ");\n\nf=continuate(f,m);\n\nfilename = '" + output_filename + ".mat';\nsave(filename,'f','m')" ) return runner
[docs] def save_runner_script(self, runner, scriptname): """ Writes the runner output file Parameters ---------- runner : string previously created runner script scriptname : string name of the runner file, can be the same as the scriptname of the INCA script Outputs ------- runner script : .m file """ file2 = open(scriptname + "_runner.m", "w") file2.write(runner) file2.close()
[docs] def run_INCA_in_MATLAB( self, INCA_base_directory, script_folder, matlab_script, runner_script ): """ Executes the script in MATLAB using INCA Prints time and produces .mat file Parameters ---------- INCA_base_directory : path the path to the base folder of your INCA installation script_folder : path the path to the location of the file you're working on matlab_script : string name of the .m file (without suffix) runner_script : string name of the runner file (usually matlab_script name + '_runner') Outputs ------- output of INCA : .mat file """ start_time = time.time() eng = matlab.engine.start_matlab() eng.cd(r"" + INCA_base_directory, nargout=0) eng.startup(nargout=0) eng.setpath(nargout=0) eng.cd(r"" + script_folder, nargout=0) _f = getattr(eng, matlab_script) _f(nargout=0) _f2 = getattr(eng, runner_script) _f2(nargout=0) eng.quit() print("--- %s seconds -" % (time.time() - start_time))
script_generator_descr = INCA_script() """A class to write and execute an INCA script in MATLAB. Examples -------- >>> from BFAIR.INCA import INCA_script After pre-processing the input, the script can either be generated all at once >>> INCA_script = INCA_script() >>> script = INCA_script.script_generator( modelReaction_data_I, atomMappingReactions_data_I, atomMappingMetabolite_data_I, measuredFluxes_data_I, experimentalMS_data_I, tracer_I ) Or sequentially (not shown) After generation, the script can be saved >>> INCA_script.save_INCA_script(script, "testscript") Same goesfor the runner script >>> runner = INCA_script.runner_script_generator('TestFile', 10) >>> INCA_script.save_runner_script(runner=runner, scriptname="testscript") With working INCA and MATLAB installations and an active MATLAB engine, the script can be executed in python # noqa E501 >>> INCA_base_directory = "/Users/Username/Documents/INCAv1.9" >>> script_folder = %pwd >>> matlab_script = "testscript" >>> runner_script = matlab_script + "_runner" >>> INCA_script.run_INCA_in_MATLAB( >>> INCA_base_directory, >>> script_folder, >>> matlab_script, >>> runner_script >>> ) For more information on how to use this module or visualize the data, please check the example notebook in the BFAIR repository.""" # noqa E501