Source code for BFAIR.mfa.INCA.INCA_reimport

# -*- coding : utf-8 -*-
"""Data re-import module."""

import time
from math import isnan, isinf
import re
from molmass.molmass import Formula
import scipy.io
import os
import pandas as pd
from datetime import datetime
from stat import ST_SIZE, ST_MTIME

__version__ = "1.0.0"


class INCA_reimport:
    def __init__(self):
        self.fittedData = pd.DataFrame()
        self.fittedFluxes = pd.DataFrame()
        self.fittedFragments = pd.DataFrame()
        self.fittedMeasuredFluxes = pd.DataFrame()
        self.fittedMeasuredFragments = pd.DataFrame()
        self.fittedMeasuredFluxResiduals = pd.DataFrame()
        self.fittedMeasuredFragmentResiduals = pd.DataFrame()
        self.simulationParameters = pd.DataFrame()

[docs]    def extract_file_info(self, filename):
        """
        Extracts information about the file

        Parameters
        ----------
        filename : str
            name of the .mat file we want to get information about

        Returns
        -------
        info : dict
            a dict containing the file information, its size,
            the structure of the timestamp
            and the timestamp of when the simulation was run
        """
        try:
            st = os.stat(filename)
        except IOError:
            print("failed to get information about", filename)
            return
        else:
            file_size = st[ST_SIZE]
            simulation_dateAndTime_struct = time.localtime(st[ST_MTIME])
            simulation_dateAndTime = datetime.fromtimestamp(
                time.mktime(simulation_dateAndTime_struct)
            )
            info = {
                "File_size": file_size,
                "Simulation_timestamp_structure": simulation_dateAndTime_struct,
                "Simulation_timestamp": simulation_dateAndTime,
            }
        return info

[docs]    def det_simulation_type(self, simulation_info):
        """
        Determine if the simulation is a parallel labeling
        experiment, non-stationary, or both

        Parameters
        ----------
        simulation_info : pandas.DataFrame
            The MS fragment file corresponding
            to the simulation

        Returns
        -------
        parallel : bool
        non_stationary : bool
            booleans that describe the type
            of experiment that was simulated
        """
        parallel = False
        non_stationary = False
        if (
            len(simulation_info["experiment_id"]) > 1
            or len(simulation_info["sample_name_abbreviation"]) > 1
        ):
            parallel = True
        if len(simulation_info["time_point"]) > 1:
            non_stationary = True
        return parallel, non_stationary

[docs]    def data_extraction(self, filename):
        """
        Extract simulation data

        Parameters
        ----------
        filename : str
            name of the .mat file we want to get information about

        Returns
        -------
        m : scipy.MatlabObject
            the model used for the simulation
        f : scipy.MatlabObject
            the fit of the model
        """
        m = scipy.io.loadmat(filename)["m"]  # model
        f = scipy.io.loadmat(filename)["f"]  # fitdata
        # s = scipy.io.loadmat(filename)['s']  # simdata, not used here
        return m, f

[docs]    def extract_model_info(self, m):
        """
        Extract model information

        Parameters
        ----------
        m : scipy.MatlabObject
            the model extracted from the file

        Returns
        -------
        model_info : pandas.DataFrame
            a DataFrame containing the MS id,
            the id of the experiment and a boolean
            describing which experiments were used in the model
        """
        m_ms_expt = []
        m_ms_id = []
        m_ms_on = []
        for exp in m["expts"]:
            exp_id = exp[0][0]["id"][0][0]
            for d in exp[0][0]["data_ms"][0]["id"][0]:
                m_ms_expt.append(exp_id)
                m_ms_id.append(d[0])
                m_ms_on.append(bool(d[0][0]))
        model_info = {"Exp": m_ms_expt, "MS_id": m_ms_id, "Exp_used": m_ms_on}
        model_info = pd.DataFrame.from_dict(model_info, "index")
        return model_info

[docs]    def extract_sim_params(self, simulation_id, info, m, filename):
        """
        Extract simulation parameters

        Parameters
        ----------
        simulation_id : str
            The name of the experiment used for
            the simulation as in the MS frament file
        m : scipy.MatlabObject
            the model extracted from the file
        filename : str
            name of the .mat file we want to get information about

        Returns
        -------
        simulationParameters : pandas.DataFrame
            the simulation parameters
        """
        simulation_dateAndTime = info["Simulation_timestamp"]
        m_options = {
            "cont_alpha": float(m["options"][0][0][0]["cont_alpha"][0][0][0]),
            "cont_reltol": float(
                m["options"][0][0][0]["cont_reltol"][0][0][0]
            ),
            "cont_steps": float(m["options"][0][0][0]["cont_steps"][0][0][0]),
            "fit_nudge": float(m["options"][0][0][0]["fit_nudge"][0][0][0]),
            "fit_reinit": bool(m["options"][0][0][0]["fit_reinit"][0][0][0]),
            "fit_reltol": float(m["options"][0][0][0]["fit_reltol"][0][0][0]),
            "fit_starts": float(m["options"][0][0][0]["fit_starts"][0][0][0]),
            "fit_tau": float(m["options"][0][0][0]["fit_tau"][0][0][0]),
            "hpc_on": bool(m["options"][0][0][0]["hpc_on"][0][0][0]),
            "int_maxstep": float(
                m["options"][0][0][0]["int_maxstep"][0][0][0]
            ),
            "int_reltol": float(m["options"][0][0][0]["int_reltol"][0][0][0]),
            "int_senstol": float(
                m["options"][0][0][0]["int_senstol"][0][0][0]
            ),
            "int_timeout": float(
                m["options"][0][0][0]["int_timeout"][0][0][0]
            ),
            "int_tspan": float(m["options"][0][0][0]["int_tspan"][0][0][0]),
            "ms_correct": bool(m["options"][0][0][0]["ms_correct"][0][0][0]),
            "oed_crit": m["options"][0][0][0]["oed_crit"][0][0],
            "oed_reinit": bool(m["options"][0][0][0]["oed_reinit"][0][0][0]),
            "oed_tolf": float(m["options"][0][0][0]["oed_tolf"][0][0][0]),
            "oed_tolx": float(m["options"][0][0][0]["oed_tolx"][0][0][0]),
            "sim_more": bool(m["options"][0][0][0]["sim_more"][0][0][0]),
            "sim_na": bool(m["options"][0][0][0]["sim_na"][0][0][0]),
            "sim_sens": bool(m["options"][0][0][0]["sim_sens"][0][0][0]),
            "sim_ss": bool(m["options"][0][0][0]["sim_ss"][0][0][0]),
            "sim_tunit": [m["options"][0][0][0]["sim_tunit"][0][0]],
        }
        try:
            m_options.update(
                {"hpc_mcr": m["options"][0][0][0]["hpc_mcr"][0][0]}
            )
        except ValueError:
            m_options.update(
                {"hpc_mcr": float(m["options"][0][0][0]["hpc_bg"][0][0][0])}
            )
        try:
            m_options.update(
                {"hpc_serve": m["options"][0][0][0]["hpc_serve"][0][0]}
            )
        except ValueError:
            m_options.update(
                {"hpc_serve": m["options"][0][0][0]["hpc_sched"][0][0]}
            )
        m_options.update(
            {
                "simulation_id": simulation_id,
                "simulation_dateAndTime": simulation_dateAndTime,
                "original_filename": filename,
                "used_": True,
                "comment_": None,
            }
        )
        simulationParameters = pd.DataFrame.from_dict(m_options)
        return simulationParameters

[docs]    def extract_base_stats(self, f, simulation_id, info):
        """
        Extract fit information

        Parameters
        ----------
        f : scipy.MatlabObject
            the fit of the model
        simulation_id : str
            The name of the experiment used for the
            simulation as in the MS frament file
        info : dict
            output of the "extract_file_info(filename)" function

        Returns
        -------
        fittedData : pandas.DataFrame
            base statistics describing the fit
        """
        f_Echi2 = None
        simulation_dateAndTime = info["Simulation_timestamp"]
        if not isnan(f["Echi2"][0][0][0][0]):
            if len(f["Echi2"][0][0][0]) > 1:
                f_Echi2 = [f["Echi2"][0][0][0][0], f["Echi2"][0][0][0][1]]
            else:
                f_Echi2 = [f["Echi2"][0][0][0][0]]
        f_alf = f["alf"][0][0][0][0]
        f_chi2 = f["chi2"][0][0][0][0]
        f_dof = int(f["dof"][0][0][0][0])
        f_ = {
            "fitted_echi2": f_Echi2,
            "fitted_alf": f_alf,
            "fitted_chi2": f_chi2,
            "fitted_dof": f_dof,
        }
        f_.update(
            {
                "simulation_id": simulation_id,
                "simulation_dateAndTime": simulation_dateAndTime,
                "used_": True,
                "comment_": None,
            }
        )
        fittedData = pd.DataFrame.from_dict(f_)
        return fittedData

[docs]    def get_fit_info(self, f):
        """
         Extract information and sum of the squared residuals of
         the fitted measurements

         Parameters
         ----------
         f : scipy.MatlabObject
             the fit of the model

        Returns
        -------
         f_mnt_info : dict
             a dict containing the the reaction id of the fitted
             measurements, the sum of the squared residuals,
             the id of the experiment used for the simulation
             and the type of the current value (a measured flux or an
             MS measurement)
        """
        f_mnt_id = []
        f_mnt_sres = []
        f_mnt_expt = []
        f_mnt_type = []  # Flux or MS
        for d in f["mnt"][0][0][0]["id"]:
            f_mnt_id.append(d[0])
        for d in f["mnt"][0][0][0]["sres"]:
            f_mnt_sres.append(float(d[0][0]))
        for d in f["mnt"][0][0][0]["expt"]:
            f_mnt_expt.append(d[0])
        for d in f["mnt"][0][0][0]["type"]:
            f_mnt_type.append(d[0])
        f_mnt_info = {
            "rxn_id": f_mnt_id,
            "rss": f_mnt_sres,
            "expt_name": f_mnt_expt,
            "expt_type": f_mnt_type,
        }
        return f_mnt_info

    # Only works for single experiments for now, might have to change it
    # for parallel labeling
[docs]    def sort_fit_info(self, f_mnt_info, simulation_info, fittedData):
        """
        Seperate the information from the original input, the "get_fit_info(f)"
        function, the "extract_file_info(filename)" function and the
        "extract_base_stats(f, simulation_id, info)" function
        into appropriate rows

        Parameters
        ----------
        f_mnt_info : dict
            the output of the "get_fit_info(f)" function
        simulation_info : pandas.DataFrame
            The MS fragment file corresponding to the simulation
        fittedData : pandas.DataFrame
            the output of the "extract_base_stats(f, simulation_id, info)"
            function

        Returns
        -------
        fittedMeasuredFluxes : pandas.DataFrame
            info about the fluxes used as an input
            for the simulation
        fittedMeasuredFragments : pandas.DataFrame
            info about the MS data used as an
            input for the simulation
        """
        fittedMeasuredFluxes = {}
        fittedMeasuredFragments = {}
        rxn_id = f_mnt_info["rxn_id"]
        rss = f_mnt_info["rss"]
        expt_name = f_mnt_info["expt_name"]
        expt_type = f_mnt_info["expt_type"]
        simulation_id = fittedData["simulation_id"].unique()[0]
        simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique()[
            0
        ]
        for cnt, x_type in enumerate(expt_type):
            if x_type == "Flux":
                if expt_name[cnt] in list(simulation_info["experiment_id"]):
                    fittedMeasuredFluxes[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": expt_name[cnt],
                        "sample_name_abbreviation": simulation_info[
                            "sample_name_abbreviation"
                        ][0],
                        "rxn_id": rxn_id[cnt],
                        "fitted_sres": rss[cnt],
                        "used_": True,
                        "comment_": None,
                    }
                elif expt_name[cnt] in list(
                    simulation_info["sample_name_abbreviation"]
                ):
                    fittedMeasuredFluxes[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": expt_name[cnt],
                        "rxn_id": rxn_id[cnt],
                        "fitted_sres": rss[cnt],
                        "used_": True,
                        "comment_": None,
                    }
            elif x_type == "MS":
                if expt_name[cnt] in list(simulation_info["experiment_id"]):
                    fittedMeasuredFragments[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": expt_name[cnt],
                        "sample_name_abbreviation": simulation_info[
                            "sample_name_abbreviation"
                        ][0],
                        "fragment_id": rxn_id[cnt],
                        "fitted_sres": rss[cnt],
                        "used_": True,
                        "comment_": None,
                    }
                elif expt_name[cnt] in list(
                    simulation_info["sample_name_abbreviation"]
                ):
                    fittedMeasuredFragments[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": expt_name[cnt],
                        "fragment_id": rxn_id[cnt],
                        "fitted_sres": rss[cnt],
                        "used_": True,
                        "comment_": None,
                    }
            else:
                print("type not recognized")
        fittedMeasuredFluxes = pd.DataFrame.from_dict(
            fittedMeasuredFluxes, "index"
        )
        fittedMeasuredFragments = pd.DataFrame.from_dict(
            fittedMeasuredFragments, "index"
        )
        return fittedMeasuredFluxes, fittedMeasuredFragments

[docs]    def get_residuals_info(self, f, simulation_info):
        """
        Extract the residuals of the fitted measurements

        Parameters
        ----------
        f : scipy.MatlabObject
            the fit of the model
        simulation_info : pandas.DataFrame
            The MS fragment file corresponding to
            the simulation

        Returns
        -------
        f_mnt_res_info : dict
            a dict containing the residuals of the fit,
            the fit itself, the type of the current value (a measured flux
            or an MS measurement)
            reaction id of the fitted measurements, the standard
            deviation of the fit,
            the time point of the sample, the name of the expeiment,
            the data used for the fit, the peak of the residuals
        """
        f_mnt_res_val = []
        f_mnt_res_fit = []
        f_mnt_res_type = []  # Flux or MS
        f_mnt_res_id = []
        f_mnt_res_std = []
        f_mnt_res_time = []
        f_mnt_res_expt = []
        f_mnt_res_data = []
        f_mnt_res_peak = []
        for d in f["mnt"][0][0][0]["res"]:
            f_mnt_res_val.append(float(d[0][0]["val"][0][0]))
            f_mnt_res_fit.append(float(d[0][0]["fit"][0][0]))
            f_mnt_res_type.append(d[0][0]["type"][0])
            f_mnt_res_id.append(d[0][0]["id"][0])
            f_mnt_res_std.append(float(d[0][0]["std"][0][0]))
            # change default of time inf to 0
            if isinf(d[0][0]["time"][0][0]):
                f_mnt_res_time.append("0")
            else:
                f_mnt_res_time.append(str(d[0][0]["time"][0][0]))
            if d[0][0]["expt"][0] == "Expt #1":
                f_mnt_res_expt.append(simulation_info["experiment_id"][0])
            else:
                f_mnt_res_expt.append(d[0][0]["expt"][0])
            f_mnt_res_data.append(d[0][0]["data"][0][0])
            if d[0][0]["peak"].size > 0:
                f_mnt_res_peak.append(d[0][0]["peak"][0])
            else:
                f_mnt_res_peak.append(None)
        f_mnt_res_info = {
            "res_val": f_mnt_res_val,
            "res_fit": f_mnt_res_fit,
            "expt_type": f_mnt_res_type,
            "rxn_id": f_mnt_res_id,
            "res_stdev": f_mnt_res_std,
            "time_point": f_mnt_res_time,
            "experiment_id": f_mnt_res_expt,
            "res_data": f_mnt_res_data,
            "res_peak": f_mnt_res_peak,
        }
        return f_mnt_res_info

[docs]    def sort_residual_info(self, f_mnt_res_info, simulation_info, fittedData):
        """
        Seperate the information from the original input, the
        "get_residuals_info(f)" function, the "extract_file_info(filename)"
        function and the "extract_base_stats(f, simulation_id, info)" function
        into appropriate rows

        Parameters
        ----------
        f_mnt_res_info : dict
            the output of the "get_residuals_info(f)" function
        simulation_info : pandas.DataFrame
            the MS fragment file corresponding to
            the simulation
        fittedData : pandas.DataFrame
            the output of the
            "extract_base_stats(f, simulation_id, info)" function

        Returns
        -------
        fittedMeasuredFluxResiduals : pandas.DataFrame
            info about the residuals of the
            fluxes used as an input for the simulation
        fittedMeasuredFragmentResiduals : pandas.DataFrame
            info about the residuals of
            the fragments in the MS data used as an input for
            the simulation
        """

        fittedMeasuredFluxResiduals = {}
        fittedMeasuredFragmentResiduals = {}

        expt_type = f_mnt_res_info["expt_type"]
        experiment_id = f_mnt_res_info["experiment_id"]
        time_point = f_mnt_res_info["time_point"]
        rxn_id = f_mnt_res_info["rxn_id"]
        res_data = f_mnt_res_info["res_data"]
        res_fit = f_mnt_res_info["res_fit"]
        res_peak = f_mnt_res_info["res_peak"]
        res_stdev = f_mnt_res_info["res_stdev"]
        res_val = f_mnt_res_info["res_val"]
        simulation_id = fittedData["simulation_id"].unique()[0]
        simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique()[
            0
        ]

        for cnt, x_type in enumerate(expt_type):
            if x_type == "Flux":
                if experiment_id[cnt] in list(
                    simulation_info["experiment_id"]
                ):
                    fittedMeasuredFluxResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": experiment_id[cnt],
                        "sample_name_abbreviation": simulation_info[
                            "sample_name_abbreviation"
                        ][0],
                        "time_point": time_point[cnt],
                        "rxn_id": rxn_id[cnt],
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
                elif experiment_id[cnt] in list(
                    simulation_info["sample_name_abbreviation"]
                ):
                    fittedMeasuredFluxResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": experiment_id[cnt],
                        "time_point": time_point[cnt],
                        "rxn_id": rxn_id[cnt],
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
            elif x_type == "MS":
                # parse the id into fragment_id and mass
                fragment_string = rxn_id[cnt]
                fragment_string = re.sub("_DASH_", "-", fragment_string)
                fragment_string = re.sub(
                    "_LPARANTHES_", "[(]", fragment_string
                )
                fragment_string = re.sub(
                    "_RPARANTHES_", "[)]", fragment_string
                )
                fragment_list = fragment_string.split("_")
                if not len(fragment_list) > 5 or not (
                    "MRM" in fragment_list or "EPI" in fragment_list
                ):
                    fragment_id = "_".join(
                        [fragment_list[0], fragment_list[1], fragment_list[2]]
                    )
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[3]
                    )
                    time_point = fragment_list[4]
                else:
                    fragment_id = "_".join(
                        [
                            fragment_list[0],
                            fragment_list[1],
                            fragment_list[2],
                            fragment_list[3],
                        ]
                    )
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[4]
                    )
                    time_point = fragment_list[5]
                fragment_id = re.sub("-", "_DASH_", fragment_id)
                fragment_id = re.sub("[(]", "_LPARANTHES_", fragment_id)
                fragment_id = re.sub("[)]", "_RPARANTHES_", fragment_id)
                if experiment_id[cnt] in list(
                    simulation_info["experiment_id"]
                ):
                    fittedMeasuredFragmentResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": experiment_id[cnt],
                        "sample_name_abbreviation": simulation_info[
                            "sample_name_abbreviation"
                        ][0],
                        "time_point": time_point,
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
                elif experiment_id[cnt] in list(
                    simulation_info["sample_name_abbreviation"]
                ):
                    fittedMeasuredFragmentResiduals[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": experiment_id[cnt],
                        "time_point": time_point[cnt],
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "res_data": float(res_data[cnt]),
                        "res_fit": float(res_fit[cnt]),
                        "res_peak": res_peak[cnt],
                        "res_stdev": float(res_stdev[cnt]),
                        "res_val": float(res_val[cnt]),
                        "res_msens": None,
                        "res_esens": None,
                        "used_": True,
                        "comment_": None,
                    }
            else:
                print("type not recognized")
        fittedMeasuredFluxResiduals = pd.DataFrame.from_dict(
            fittedMeasuredFluxResiduals, "index"
        )
        fittedMeasuredFragmentResiduals = pd.DataFrame.from_dict(
            fittedMeasuredFragmentResiduals, "index"
        )
        return fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals

[docs]    def get_fitted_parameters(self, f, simulation_info):
        """
        Extract the fitted parameters

        Parameters
        ----------
        f : scipy.MatlabObject
            the fit of the model
        simulation_info : pandas.DataFrame
            the MS fragment file corresponding to
            the simulation

        Returns
        -------
        f_par_info : dict
            a dict containing the reaction id,
            the corresponding flux value, the fluxes standard deviation,
            the type of the flux (net flux or norm), the lower and upper
            bounds of the flux,
            the fluxes unit, the alf, the chi2 values of the fit,
            a correlation parameter,
            the fluxes covariance and a boolean describing if it's a free
            flux or not.
        """
        f_par_id = []
        f_par_val = []
        f_par_std = []
        f_par_type = []  # 'Net flux' or 'Norm'
        f_par_lb = []
        f_par_ub = []
        f_par_unit = []
        f_par_alf = []
        f_par_chi2s = []
        f_par_cor = []
        f_par_cov = []
        f_par_free = []
        for d in f["par"][0][0][0]["id"]:
            if "Expt #1" in d[0]:
                id_str = d[0].astype("str")
                f_par_id.append(
                    id_str.replace(
                        "Expt #1", simulation_info["experiment_id"][0]
                    )
                )
            else:
                f_par_id.append(d[0])
        # ensure that there are no negative values or infinite values
        for d in f["par"][0][0][0]["val"]:
            if not d:
                f_par_val.append(0.0)
            elif isnan(d[0][0]):
                f_par_val.append(0.0)
            elif isinf(d[0][0]):
                f_par_val.append(1.0e3)
            else:
                f_par_val.append(float(d[0][0]))
        for d in f["par"][0][0][0]["std"]:
            if not d:
                f_par_std.append(0.0)
            elif isnan(d[0][0]):
                f_par_val.append(0.0)
            else:
                f_par_std.append(float(d[0][0]))
        for d in f["par"][0][0][0]["type"]:
            f_par_type.append(d[0])
        # adjust the lb and ub to [0,1000];
        for cnt, d in enumerate(f["par"][0][0][0]["lb"]):
            if d.size == 0:
                f_par_lb.append(0.0)
            elif isnan(d[0][0]):
                f_par_lb.append(0.0)
            else:
                f_par_lb.append(float(d[0][0]))
        for d in f["par"][0][0][0]["ub"]:
            if d.size == 0:
                f_par_ub.append(1.0e3)
            elif isinf(d[0][0]) or isnan(d[0][0]):
                f_par_ub.append(1.0e3)
            else:
                f_par_ub.append(float(d[0][0]))
        for d in f["par"][0][0][0]["unit"]:
            if d.size == 0:
                f_par_unit.append("mmol*gDCW-1*hr-1")
            else:
                f_par_unit.append(d[0])
        for d in f["par"][0][0][0]["alf"]:
            f_par_alf.append(float(d[0][0]))
        for d in f["par"][0][0][0]["chi2s"]:
            f_par_chi2s.append(d)
        for d in f["par"][0][0][0]["cor"]:
            f_par_cor.append(d)
        for d in f["par"][0][0][0]["cov"]:
            f_par_cov.append(d)
        for d in f["par"][0][0][0]["free"]:
            f_par_free.append(bool(d[0][0]))
        f_par_info = {
            "rxn_id": f_par_id,
            "flux": f_par_val,
            "flux_stdev": f_par_std,
            "par_type": f_par_type,
            "flux_lb": f_par_lb,
            "flux_ub": f_par_ub,
            "flux_units": f_par_unit,
            "fit_alf": f_par_alf,
            "fit_chi2s": f_par_chi2s,
            "fit_cor": f_par_cor,
            "fit_cov": f_par_cov,
            "free": f_par_free,
        }
        return f_par_info

    # fit_cor, f_par_cov and fit_chi2s produce quite a lot of output,
    # they are arrays, so they were set to "None". If desired, set them to
    # f_par_c**[cnt] (the variable names can be found right underneath
    # the docstring)
[docs]    def sort_parameter_info(self, f_par_info, simulation_info, fittedData):
        """
        Seperate the information from the original input, the
        "get_fitted_parameters(f, simulation_info)"
        function, the "extract_file_info(filename)" function and the
        "extract_base_stats(f, simulation_id, info)" function
        into appropriate rows

        Parameters
        ----------
        f_par_info : dict
            output of the
            get_fitted_parameters(f, simulation_info) function
        simulation_info : pandas.DataFrame
            the MS fragment file corresponding to
            the simulation
        fittedData : pandas.DataFrame
            the output of the
            "extract_base_stats(f, simulation_id, info)" function

        Returns
        -------
        fittedFluxes : pandas.DataFrame
            info about the parameters of the fluxes used
            as an input for the simulation
        fittedFragments : pandas.DataFrame
            info about the parameters of the MS data
            used as an input for the simulation
        """
        fittedFluxes = {}
        fittedFragments = {}

        rxn_id = f_par_info["rxn_id"]
        flux = f_par_info["flux"]
        flux_stdev = f_par_info["flux_stdev"]
        par_type = f_par_info["par_type"]
        flux_lb = f_par_info["flux_lb"]
        flux_ub = f_par_info["flux_ub"]
        flux_units = f_par_info["flux_units"]
        fit_alf = f_par_info["fit_alf"]
        free = f_par_info["free"]
        # f_par_chi2s = f_par_info["fit_chi2s"]
        # f_par_cor = f_par_info["fit_cor"]
        # f_par_cov = f_par_info["fit_cov"]

        simulation_id = fittedData["simulation_id"].unique()[0]
        simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique()[
            0
        ]

        for cnt, p_type in enumerate(par_type):
            if p_type == "Net flux":
                fittedFluxes[cnt] = {
                    "simulation_id": simulation_id,
                    "simulation_dateAndTime": simulation_dateAndTime,
                    "rxn_id": rxn_id[cnt],
                    "flux": flux[cnt],
                    "flux_stdev": flux_stdev[cnt],
                    "flux_lb": flux_lb[cnt],
                    "flux_ub": flux_ub[cnt],
                    "flux_units": flux_units[cnt],
                    "fit_alf": fit_alf[cnt],
                    "fit_chi2s": None,
                    "fit_cor": None,
                    "fit_cov": None,
                    "free": free[cnt],
                    "used_": True,
                    "comment_": None,
                }
            elif p_type == "Norm":
                # parse the id
                id_list = rxn_id[cnt].split(" ")
                expt = id_list[0]
                fragment_id = id_list[1]
                fragment_string = id_list[2]
                units = id_list[3]
                # parse the id into fragment_id and mass
                fragment_string = re.sub("_DASH_", "-", fragment_string)
                fragment_string = re.sub(
                    "_LPARANTHES_", "[(]", fragment_string
                )
                fragment_string = re.sub(
                    "_RPARANTHES_", "[)]", fragment_string
                )
                fragment_list = fragment_string.split("_")
                if not len(fragment_list) > 5 or not (
                    "MRM" in fragment_list or "EPI" in fragment_list
                ):
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[3]
                    )
                    time_point = fragment_list[4]
                else:
                    fragment_mass = Formula(fragment_list[2]).mass + float(
                        fragment_list[4]
                    )
                    time_point = fragment_list[5]
                if expt in list(simulation_info["experiment_id"]):
                    fittedFragments[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": expt,
                        "sample_name_abbreviation": simulation_info[
                            "sample_name_abbreviation"
                        ][0],
                        "time_point": time_point,
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "fit_val": flux[cnt],
                        "fit_stdev": flux_stdev[cnt],
                        "fit_units": units,
                        "fit_alf": fit_alf[cnt],
                        "fit_cor": None,
                        "fit_cov": None,
                        "free": free[cnt],
                        "used_": True,
                        "comment_": None,
                    }
                elif expt in list(simulation_info["sample_name_abbreviation"]):
                    fittedFragments[cnt] = {
                        "simulation_id": simulation_id,
                        "simulation_dateAndTime": simulation_dateAndTime,
                        "experiment_id": simulation_info["experiment_id"][0],
                        "sample_name_abbreviation": expt,
                        "time_point": time_point,
                        "fragment_id": fragment_id,
                        "fragment_mass": fragment_mass,
                        "fit_val": flux[cnt],
                        "fit_stdev": flux_stdev[cnt],
                        "fit_units": units,
                        "fit_alf": fit_alf[cnt],
                        "fit_cor": None,
                        "fit_cov": None,
                        "free": free[cnt],
                        "used_": True,
                        "comment_": None,
                    }
            else:
                print("type not recognized")
        fittedFluxes = pd.DataFrame.from_dict(fittedFluxes, "index")
        fittedFragments = pd.DataFrame.from_dict(fittedFragments, "index")
        return fittedFluxes, fittedFragments

[docs]    def reimport(self, filename, simulation_info, simulation_id):
        """
        Summary function that includes all of the previously set up functions
        and produces all of the outputs

        Parameters
        ----------
        filename : str
            name of the .mat file we want to get information about
        simulation_info : pandas.DataFrame
            The MS fragment file corresponding to the simulation
        simulation_id : str
            The name of the experiment used for the simulation as in the MS
            fragment file

        Returns
        -------
        fittedData : pandas.DataFrame
            base statistics describing the fit
        fittedFluxes : pandas.DataFrame
            info about the fitted fluxes
        fittedFragments : pandas.DataFrame
            info about the fitted fragments
        fittedMeasuredFluxes : pandas.DataFrame
            info about the fluxes used as an input for the simulation
        fittedMeasuredFragments : pandas.DataFrame
            info about the MS data used as an input for the simulation
        fittedMeasuredFluxResiduals : pandas.DataFrame
            info about the residuals of the fluxes used as an input for
            the simulation
        fittedMeasuredFragmentResiduals : pandas.DataFrame
            info about the residuals of the fragments in the
            MS data used as an input for the simulation
        simulationParameters : pandas.DataFrame
            the simulation parameters
        """
        # Succession of functions
        info = self.extract_file_info(filename)
        parallel, non_stationary = self.det_simulation_type(simulation_info)
        m, f = self.data_extraction(filename)
        # model_info = self.extract_model_info(m)
        # not used for the final output
        simulationParameters = self.extract_sim_params(
            simulation_id, info, m, filename
        )
        fittedData = self.extract_base_stats(f, simulation_id, info)
        f_mnt_info = self.get_fit_info(f)
        fittedMeasuredFluxes, fittedMeasuredFragments = self.sort_fit_info(
            f_mnt_info, simulation_info, fittedData
        )
        f_mnt_res_info = self.get_residuals_info(f, simulation_info)
        (
            fittedMeasuredFluxResiduals,
            fittedMeasuredFragmentResiduals,
        ) = self.sort_residual_info(
            f_mnt_res_info, simulation_info, fittedData
        )
        f_par_info = self.get_fitted_parameters(f, simulation_info)
        fittedFluxes, fittedFragments = self.sort_parameter_info(
            f_par_info, simulation_info, fittedData
        )
        return (
            fittedData,
            fittedFluxes,
            fittedFragments,
            fittedMeasuredFluxes,
            fittedMeasuredFragments,
            fittedMeasuredFluxResiduals,
            fittedMeasuredFragmentResiduals,
            simulationParameters,
        )


reimport_descr = INCA_reimport()
"""A class to re-import the INCA output data.
It is parsed from MATLAB to python DataFrames.

Examples
--------
>>> from BFAIR.INCA import INCA_reimport

After initialization, the data can either be re-imported all at once

>>> reimport_data = INCA_reimport()
>>> reimport_data.reimport(filename, simulation_info, simulation_id)

Or sequentially (not shown)

For more information on how to use this module or visualize the data, please check the example notebook in the BFAIR repository."""  # noqa E501