{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "247af578",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Determination of memory status is not supported on this \n",
" platform, measuring for memoryleaks will never fail\n"
]
}
],
"source": [
"import pandas as pd\n",
"import cobra\n",
"from BFAIR.mfa.INCA import INCA_reimport\n",
"from BFAIR.mfa.sampling import (\n",
" model_rxn_overlap,\n",
" rxn_coverage,\n",
" split_lumped_rxns,\n",
" split_lumped_reverse_rxns,\n",
" find_reverse_rxns,\n",
" combine_split_rxns,\n",
" cobra_add_split_rxns,\n",
" find_biomass_reaction,\n",
" replace_biomass_rxn_name,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "0e52647c",
"metadata": {},
"source": [
"#### INCA re-import"
]
},
{
"cell_type": "markdown",
"id": "ac4d8422",
"metadata": {},
"source": [
"First, let's reimport the data using our `BFAIR INCA_reimport` tools"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ee0f4c60",
"metadata": {},
"outputs": [],
"source": [
"filename = 'data/MFA_modelInputsData/TestFile.mat'\n",
"simulation_info = pd.read_csv('data/MFA_modelInputsData/Re-import/experimentalMS_data_I.csv')\n",
"simulation_id = 'WTEColi_113C80_U13C20_01'"
]
},
{
"cell_type": "markdown",
"id": "1ae4727d",
"metadata": {},
"source": [
"Here we re-import the INCA output"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b514e21e",
"metadata": {},
"outputs": [],
"source": [
"reimport_data = INCA_reimport()\n",
"(fittedData,\n",
" fittedFluxes,\n",
" fittedFragments,\n",
" fittedMeasuredFluxes,\n",
" fittedMeasuredFragments,\n",
" fittedMeasuredFluxResiduals,\n",
" fittedMeasuredFragmentResiduals,\n",
" simulationParameters) = reimport_data.reimport(\n",
" filename,\n",
" simulation_info,\n",
" simulation_id\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a56c8f11",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" simulation_id | \n",
" simulation_dateAndTime | \n",
" rxn_id | \n",
" flux | \n",
" flux_stdev | \n",
" flux_lb | \n",
" flux_ub | \n",
" flux_units | \n",
" fit_alf | \n",
" fit_chi2s | \n",
" fit_cor | \n",
" fit_cov | \n",
" free | \n",
" used_ | \n",
" comment_ | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" 26dap_DASH_MSYN | \n",
" 2.295040e-01 | \n",
" 0.002608 | \n",
" 0.224392 | \n",
" 0.234616 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 1 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ACONTa_ACONTb | \n",
" 2.074886e+00 | \n",
" 16996.864976 | \n",
" 1.185984 | \n",
" 1000.000000 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 2 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ACONTa_ACONTb_reverse | \n",
" 8.690514e-07 | \n",
" 15432.592032 | \n",
" 0.000000 | \n",
" 28.927600 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" True | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 3 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" AKGDH | \n",
" 1.423617e-01 | \n",
" 7673.615592 | \n",
" 0.000000 | \n",
" 1.919800 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 4 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ALATA_L | \n",
" 3.435520e-01 | \n",
" 0.003904 | \n",
" 0.335900 | \n",
" 0.351204 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 92 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" TPI | \n",
" 2.175603e+00 | \n",
" 25620.656341 | \n",
" 0.000000 | \n",
" 1000.000000 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 93 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" TPI_reverse | \n",
" 8.689299e-07 | \n",
" 24739.593649 | \n",
" 0.000000 | \n",
" 1000.000000 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" True | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 94 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" TrpSYN | \n",
" 3.801600e-02 | \n",
" 0.000432 | \n",
" 0.037169 | \n",
" 0.038863 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 95 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" TyrSYN | \n",
" 9.222400e-02 | \n",
" 0.001048 | \n",
" 0.090170 | \n",
" 0.094278 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 96 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ValSYN | \n",
" 2.830080e-01 | \n",
" 0.003216 | \n",
" 0.276705 | \n",
" 0.289311 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
"
\n",
"
97 rows × 15 columns
\n",
"
"
],
"text/plain": [
" simulation_id simulation_dateAndTime rxn_id \\\n",
"0 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 26dap_DASH_MSYN \n",
"1 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ACONTa_ACONTb \n",
"2 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ACONTa_ACONTb_reverse \n",
"3 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 AKGDH \n",
"4 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ALATA_L \n",
".. ... ... ... \n",
"92 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 TPI \n",
"93 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 TPI_reverse \n",
"94 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 TrpSYN \n",
"95 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 TyrSYN \n",
"96 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ValSYN \n",
"\n",
" flux flux_stdev flux_lb flux_ub flux_units \\\n",
"0 2.295040e-01 0.002608 0.224392 0.234616 mmol*gDCW-1*hr-1 \n",
"1 2.074886e+00 16996.864976 1.185984 1000.000000 mmol*gDCW-1*hr-1 \n",
"2 8.690514e-07 15432.592032 0.000000 28.927600 mmol*gDCW-1*hr-1 \n",
"3 1.423617e-01 7673.615592 0.000000 1.919800 mmol*gDCW-1*hr-1 \n",
"4 3.435520e-01 0.003904 0.335900 0.351204 mmol*gDCW-1*hr-1 \n",
".. ... ... ... ... ... \n",
"92 2.175603e+00 25620.656341 0.000000 1000.000000 mmol*gDCW-1*hr-1 \n",
"93 8.689299e-07 24739.593649 0.000000 1000.000000 mmol*gDCW-1*hr-1 \n",
"94 3.801600e-02 0.000432 0.037169 0.038863 mmol*gDCW-1*hr-1 \n",
"95 9.222400e-02 0.001048 0.090170 0.094278 mmol*gDCW-1*hr-1 \n",
"96 2.830080e-01 0.003216 0.276705 0.289311 mmol*gDCW-1*hr-1 \n",
"\n",
" fit_alf fit_chi2s fit_cor fit_cov free used_ comment_ \n",
"0 0.05 None None None False True None \n",
"1 0.05 None None None False True None \n",
"2 0.05 None None None True True None \n",
"3 0.05 None None None False True None \n",
"4 0.05 None None None False True None \n",
".. ... ... ... ... ... ... ... \n",
"92 0.05 None None None False True None \n",
"93 0.05 None None None True True None \n",
"94 0.05 None None None False True None \n",
"95 0.05 None None None False True None \n",
"96 0.05 None None None False True None \n",
"\n",
"[97 rows x 15 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fittedFluxes"
]
},
{
"cell_type": "markdown",
"id": "ea2e6c1e",
"metadata": {},
"source": [
"Here we import the model"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f2f6f0d7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Academic license - for non-commercial use only - expires 2021-07-30\n",
"Using license file /Users/matmat/gurobi.lic\n"
]
}
],
"source": [
"model = cobra.io.load_json_model('data/FIA_MS_example/database_files/iJO1366.json')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "213b6a45",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"61.0 %\n"
]
}
],
"source": [
"rxn_coverage(fittedFluxes, model)"
]
},
{
"cell_type": "markdown",
"id": "ef6cf6a2",
"metadata": {},
"source": [
"61 % of all the reimported reactions have non-overlapping names with the model we want to use for simulations! That a bit much. Let's do something about that"
]
},
{
"cell_type": "markdown",
"id": "9cac90bd",
"metadata": {},
"source": [
"First, let's find the name of the biomass reaction in the model and replace the one in our data with it"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9f0cb319",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['BIOMASS_Ec_iJO1366_WT_53p95M', 'BIOMASS_Ec_iJO1366_core_53p95M']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"find_biomass_reaction(model)"
]
},
{
"cell_type": "markdown",
"id": "db5cf93e",
"metadata": {},
"source": [
"This model has two biomass reactions, a full and a reduced core biomass reaction. In the summary we can see that the core biomass reaction is the assigned objective function, so we will reassign this name to our biomass function"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0716fb30",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Objective
1.0 BIOMASS_Ec_iJO1366_core_53p95M = 0.9823718127269793
Uptake
\n",
" \n",
" \n",
" | Metabolite | \n",
" Reaction | \n",
" Flux | \n",
" C-Number | \n",
" C-Flux | \n",
"
\n",
" \n",
" \n",
" \n",
" | ca2_e | \n",
" EX_ca2_e | \n",
" 0.005113 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | cl_e | \n",
" EX_cl_e | \n",
" 0.005113 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | cobalt2_e | \n",
" EX_cobalt2_e | \n",
" 2.456E-05 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | cu2_e | \n",
" EX_cu2_e | \n",
" 0.0006965 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | fe2_e | \n",
" EX_fe2_e | \n",
" 0.01578 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | glc__D_e | \n",
" EX_glc__D_e | \n",
" 10 | \n",
" 6 | \n",
" 100.00% | \n",
"
\n",
" \n",
" | k_e | \n",
" EX_k_e | \n",
" 0.1918 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | mg2_e | \n",
" EX_mg2_e | \n",
" 0.008522 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | mn2_e | \n",
" EX_mn2_e | \n",
" 0.0006788 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | mobd_e | \n",
" EX_mobd_e | \n",
" 0.0001267 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | nh4_e | \n",
" EX_nh4_e | \n",
" 10.61 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | ni2_e | \n",
" EX_ni2_e | \n",
" 0.0003173 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | o2_e | \n",
" EX_o2_e | \n",
" 17.58 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | pi_e | \n",
" EX_pi_e | \n",
" 0.9476 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | so4_e | \n",
" EX_so4_e | \n",
" 0.2478 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | zn2_e | \n",
" EX_zn2_e | \n",
" 0.000335 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
"
Secretion
\n",
" \n",
" \n",
" | Metabolite | \n",
" Reaction | \n",
" Flux | \n",
" C-Number | \n",
" C-Flux | \n",
"
\n",
" \n",
" \n",
" \n",
" | 4crsol_c | \n",
" DM_4crsol_c | \n",
" -0.0002191 | \n",
" 7 | \n",
" 0.01% | \n",
"
\n",
" \n",
" | 5drib_c | \n",
" DM_5drib_c | \n",
" -0.000221 | \n",
" 5 | \n",
" 0.01% | \n",
"
\n",
" \n",
" | amob_c | \n",
" DM_amob_c | \n",
" -1.965E-06 | \n",
" 15 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | mththf_c | \n",
" DM_mththf_c | \n",
" -0.0004401 | \n",
" 5 | \n",
" 0.01% | \n",
"
\n",
" \n",
" | co2_e | \n",
" EX_co2_e | \n",
" -19.68 | \n",
" 1 | \n",
" 99.98% | \n",
"
\n",
" \n",
" | h2o_e | \n",
" EX_h2o_e | \n",
" -45.62 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | h_e | \n",
" EX_h_e | \n",
" -9.026 | \n",
" 0 | \n",
" 0.00% | \n",
"
\n",
" \n",
" | meoh_e | \n",
" EX_meoh_e | \n",
" -1.965E-06 | \n",
" 1 | \n",
" 0.00% | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "7482586f",
"metadata": {},
"outputs": [],
"source": [
"fittedFluxes = replace_biomass_rxn_name(fittedFluxes, biomass_string='Biomass', biomass_rxn_name='BIOMASS_Ec_iJO1366_core_53p95M')"
]
},
{
"cell_type": "markdown",
"id": "3cefceae",
"metadata": {},
"source": [
"Next step, adjust the names of our MFA data so that they can be assigned to our model's reactions"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "2dc33063",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 26dap_DASH_MSYN\n",
"1 ACONTa_ACONTb\n",
"2 ACONTa_ACONTb_reverse\n",
"5 ArgSYN\n",
"14 EX_ac_LPAREN_e_RPAREN_\n",
"15 EX_co2_LPAREN_e_RPAREN_\n",
"16 EX_co2_LPAREN_e_RPAREN__unlabeled\n",
"17 EX_glc_LPAREN_e_RPAREN_\n",
"18 EX_nh4_LPAREN_e_RPAREN_\n",
"19 EX_o2_LPAREN_e_RPAREN_\n",
"20 EX_so4_LPAREN_e_RPAREN_\n",
"21 FADR_NADH_CYTBD_HYD_ATPS4r\n",
"23 FBA_reverse\n",
"25 FUM_reverse\n",
"26 G6PDH2r_PGL\n",
"27 GAPD_PGK\n",
"28 GAPD_PGK_reverse\n",
"30 GHMT2r_reverse\n",
"34 GluSYN\n",
"37 HisSYN\n",
"39 ICDHyr_reverse\n",
"41 IleSYN\n",
"42 LeuSYN\n",
"45 MDH_reverse\n",
"48 MetSYN\n",
"49 MlthfSYN\n",
"50 MlthfSYN_reverse\n",
"53 NADH_CYTBD_HYD_ATPS4r\n",
"54 NADTRHD_THD2pp\n",
"55 NADTRHD_THD2pp_reverse\n",
"59 PGI_reverse\n",
"61 PGM_reverse\n",
"62 PheSYN\n",
"65 ProSYN\n",
"67 PTAr_ACKr_ACS\n",
"68 PTAr_ACKr_ACS_reverse\n",
"71 RPE_reverse\n",
"73 RPI_reverse\n",
"74 SERAT_CYSS\n",
"75 SerSYN\n",
"76 SUCCOAS\n",
"77 SUCCOAS_reverse\n",
"79 SUCDi_reverse\n",
"81 TALA_reverse\n",
"82 THRD_GLYAT\n",
"83 ThrSYN\n",
"84 TKT1a\n",
"85 TKT1a_reverse\n",
"86 TKT1b\n",
"87 TKT1b_reverse\n",
"88 TKT2a\n",
"89 TKT2a_reverse\n",
"90 TKT2b\n",
"91 TKT2b_reverse\n",
"93 TPI_reverse\n",
"94 TrpSYN\n",
"95 TyrSYN\n",
"96 ValSYN\n",
"Name: rxn_id, dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_rxn_overlap(fittedFluxes, model)"
]
},
{
"cell_type": "markdown",
"id": "dbda89f0",
"metadata": {},
"source": [
"Observations: \n",
"1) some reaction names include more than one metabolite\n",
"2) many unassigned amino acids end with `SYN` and\n",
"3) some exchange reactions include `LPAREN_` and `RPAREN_`. Let's try to do something about that\n",
"4) probably all `_reverse` reactions could not be assigned"
]
},
{
"cell_type": "markdown",
"id": "dda8a6c2",
"metadata": {},
"source": [
"1) Split the lumped reactions and give all of them the same bounds"
]
},
{
"cell_type": "markdown",
"id": "1c5ecaf0",
"metadata": {},
"source": [
"So let's pick the ones we want. Let's save the reverse reactions for a separate step"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4532e13b",
"metadata": {},
"outputs": [],
"source": [
"lumped_ids = [1, 21, 26, 27, 53, 54, 67, 74, 82]\n",
"mask = []\n",
"overlap = model_rxn_overlap(fittedFluxes, model)\n",
"for i in overlap.iteritems():\n",
" if i[0] in lumped_ids:\n",
" mask.append(True)\n",
" else:\n",
" mask.append(False)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "698d6eb0",
"metadata": {},
"outputs": [],
"source": [
"lumped_rxns = model_rxn_overlap(fittedFluxes, model)[mask]\n",
"fittedFluxes = split_lumped_rxns(lumped_rxns, fittedFluxes)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "80a80b32",
"metadata": {},
"outputs": [],
"source": [
"lumped_reverse_ids = [2, 28, 55, 68]\n",
"mask_reverse = []\n",
"for i in model_rxn_overlap(fittedFluxes, model).iteritems():\n",
" if i[0] in lumped_reverse_ids:\n",
" mask_reverse.append(True)\n",
" else:\n",
" mask_reverse.append(False)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "2f8149c7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ACONTa_ACONTb_reverse\n",
"GAPD_PGK_reverse\n",
"NADTRHD_THD2pp_reverse\n",
"PTAr_ACKr_ACS_reverse\n"
]
}
],
"source": [
"lumped_reverse_rxns = model_rxn_overlap(fittedFluxes, model)[mask_reverse]\n",
"fittedFluxes = split_lumped_reverse_rxns(lumped_reverse_rxns, fittedFluxes)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "b22349e6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 26dap_DASH_MSYN\n",
"2 ACONTa_reverse\n",
"5 ArgSYN\n",
"14 EX_ac_LPAREN_e_RPAREN_\n",
"15 EX_co2_LPAREN_e_RPAREN_\n",
" ... \n",
"112 ACONTb_reverse\n",
"113 PGK_reverse\n",
"114 THD2pp_reverse\n",
"115 ACKr_reverse\n",
"116 ACS_reverse\n",
"Name: rxn_id, Length: 63, dtype: object"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_rxn_overlap(fittedFluxes, model)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "b65848cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"54.0 %\n"
]
}
],
"source": [
"rxn_coverage(fittedFluxes, model)"
]
},
{
"cell_type": "markdown",
"id": "f163e56e",
"metadata": {},
"source": [
"2) SYN, these reactions might be lumped; let's investigate!"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d78af3cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ARGAGMt7pp\n",
"ARGDC\n",
"ARGDCpp\n",
"ARGORNt7pp\n",
"ARGSL\n",
"ARGSS\n",
"ARGTRS\n",
"ARGabcpp\n",
"ARGt3pp\n",
"ARGtex\n"
]
}
],
"source": [
"for rxn in model.reactions:\n",
" if 'ARG' in rxn.id:\n",
" print(rxn.id)"
]
},
{
"cell_type": "markdown",
"id": "38c9a1da",
"metadata": {},
"source": [
"Yeah I guess so... This sucks, not sure of we can do anything about that"
]
},
{
"cell_type": "markdown",
"id": "48a77bad",
"metadata": {},
"source": [
"3) Let's remove the extra bits in the exchange reaction strings"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "35ad96e7",
"metadata": {},
"outputs": [],
"source": [
"for i, row in fittedFluxes.iterrows():\n",
" if 'LPAREN_' in row['rxn_id']:\n",
" fittedFluxes.at[i, 'rxn_id'] = row['rxn_id'].replace('LPAREN_', '').replace('_RPAREN_', '')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "51464d06",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"50.0 %\n"
]
}
],
"source": [
"rxn_coverage(fittedFluxes, model)"
]
},
{
"cell_type": "markdown",
"id": "2442e13a",
"metadata": {},
"source": [
"4) Reverse. Let's check if the forward and reverse fluxes are actually separate. If not, then the two of them will define the bounds together. If they are, then we should add new reverse reactions to the model."
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "6113cc10",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" forward | \n",
" reverse | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2 | \n",
" ACONTa | \n",
" ACONTa_reverse | \n",
"
\n",
" \n",
" | 23 | \n",
" FBA | \n",
" FBA_reverse | \n",
"
\n",
" \n",
" | 25 | \n",
" FUM | \n",
" FUM_reverse | \n",
"
\n",
" \n",
" | 28 | \n",
" GAPD | \n",
" GAPD_reverse | \n",
"
\n",
" \n",
" | 30 | \n",
" GHMT2r | \n",
" GHMT2r_reverse | \n",
"
\n",
" \n",
" | 39 | \n",
" ICDHyr | \n",
" ICDHyr_reverse | \n",
"
\n",
" \n",
" | 45 | \n",
" MDH | \n",
" MDH_reverse | \n",
"
\n",
" \n",
" | 50 | \n",
" MlthfSYN | \n",
" MlthfSYN_reverse | \n",
"
\n",
" \n",
" | 55 | \n",
" NADTRHD | \n",
" NADTRHD_reverse | \n",
"
\n",
" \n",
" | 59 | \n",
" PGI | \n",
" PGI_reverse | \n",
"
\n",
" \n",
" | 61 | \n",
" PGM | \n",
" PGM_reverse | \n",
"
\n",
" \n",
" | 68 | \n",
" PTAr | \n",
" PTAr_reverse | \n",
"
\n",
" \n",
" | 71 | \n",
" RPE | \n",
" RPE_reverse | \n",
"
\n",
" \n",
" | 73 | \n",
" RPI | \n",
" RPI_reverse | \n",
"
\n",
" \n",
" | 77 | \n",
" SUCCOAS | \n",
" SUCCOAS_reverse | \n",
"
\n",
" \n",
" | 79 | \n",
" SUCDi | \n",
" SUCDi_reverse | \n",
"
\n",
" \n",
" | 81 | \n",
" TALA | \n",
" TALA_reverse | \n",
"
\n",
" \n",
" | 85 | \n",
" TKT1a | \n",
" TKT1a_reverse | \n",
"
\n",
" \n",
" | 87 | \n",
" TKT1b | \n",
" TKT1b_reverse | \n",
"
\n",
" \n",
" | 89 | \n",
" TKT2a | \n",
" TKT2a_reverse | \n",
"
\n",
" \n",
" | 91 | \n",
" TKT2b | \n",
" TKT2b_reverse | \n",
"
\n",
" \n",
" | 93 | \n",
" TPI | \n",
" TPI_reverse | \n",
"
\n",
" \n",
" | 112 | \n",
" ACONTb | \n",
" ACONTb_reverse | \n",
"
\n",
" \n",
" | 113 | \n",
" PGK | \n",
" PGK_reverse | \n",
"
\n",
" \n",
" | 114 | \n",
" THD2pp | \n",
" THD2pp_reverse | \n",
"
\n",
" \n",
" | 115 | \n",
" ACKr | \n",
" ACKr_reverse | \n",
"
\n",
" \n",
" | 116 | \n",
" ACS | \n",
" ACS_reverse | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" forward reverse\n",
"2 ACONTa ACONTa_reverse\n",
"23 FBA FBA_reverse\n",
"25 FUM FUM_reverse\n",
"28 GAPD GAPD_reverse\n",
"30 GHMT2r GHMT2r_reverse\n",
"39 ICDHyr ICDHyr_reverse\n",
"45 MDH MDH_reverse\n",
"50 MlthfSYN MlthfSYN_reverse\n",
"55 NADTRHD NADTRHD_reverse\n",
"59 PGI PGI_reverse\n",
"61 PGM PGM_reverse\n",
"68 PTAr PTAr_reverse\n",
"71 RPE RPE_reverse\n",
"73 RPI RPI_reverse\n",
"77 SUCCOAS SUCCOAS_reverse\n",
"79 SUCDi SUCDi_reverse\n",
"81 TALA TALA_reverse\n",
"85 TKT1a TKT1a_reverse\n",
"87 TKT1b TKT1b_reverse\n",
"89 TKT2a TKT2a_reverse\n",
"91 TKT2b TKT2b_reverse\n",
"93 TPI TPI_reverse\n",
"112 ACONTb ACONTb_reverse\n",
"113 PGK PGK_reverse\n",
"114 THD2pp THD2pp_reverse\n",
"115 ACKr ACKr_reverse\n",
"116 ACS ACS_reverse"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"find_reverse_rxns(fittedFluxes)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "b9820148",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"These reactions need to be split into two: ACONTa\n",
"These reactions need to be split into two: FUM\n",
"These reactions need to be split into two: GAPD\n",
"These reactions need to be split into two: ICDHyr\n",
"These reactions need to be split into two: MlthfSYN\n",
"These reactions need to be split into two: PGM\n",
"These reactions need to be split into two: PTAr\n",
"These reactions need to be split into two: ACONTb\n",
"These reactions need to be split into two: PGK\n",
"These reactions need to be split into two: ACKr\n",
"These reactions need to be split into two: ACS\n"
]
}
],
"source": [
"fittedFluxes, rxns_to_split = combine_split_rxns(fittedFluxes)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "b382bb6e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" index | \n",
" simulation_id | \n",
" simulation_dateAndTime | \n",
" rxn_id | \n",
" flux | \n",
" flux_stdev | \n",
" flux_lb | \n",
" flux_ub | \n",
" flux_units | \n",
" fit_alf | \n",
" fit_chi2s | \n",
" fit_cor | \n",
" fit_cov | \n",
" free | \n",
" used_ | \n",
" comment_ | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" 26dap_DASH_MSYN | \n",
" 2.295040e-01 | \n",
" 2.607999e-03 | \n",
" 0.224392 | \n",
" 0.234616 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ACONTa | \n",
" 2.074886e+00 | \n",
" 1.699686e+04 | \n",
" 1.185984 | \n",
" 1000.000000 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ACONTa_reverse | \n",
" 8.690514e-07 | \n",
" 1.543259e+04 | \n",
" 0.000000 | \n",
" 28.927600 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" True | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 3 | \n",
" 3 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" AKGDH | \n",
" 1.423617e-01 | \n",
" 7.673616e+03 | \n",
" 0.000000 | \n",
" 1.919800 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 4 | \n",
" 4 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ALATA_L | \n",
" 3.435520e-01 | \n",
" 3.904002e-03 | \n",
" 0.335900 | \n",
" 0.351204 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 96 | \n",
" 111 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" GLYAT | \n",
" 1.000000e-07 | \n",
" 6.096786e-12 | \n",
" 0.000000 | \n",
" 0.035201 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" False | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 97 | \n",
" 112 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ACONTb_reverse | \n",
" 8.690514e-07 | \n",
" 1.543259e+04 | \n",
" 0.000000 | \n",
" 28.927600 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" True | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 98 | \n",
" 113 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" PGK_reverse | \n",
" 8.689295e-07 | \n",
" 9.920303e+03 | \n",
" 0.000000 | \n",
" 24.418874 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" True | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 99 | \n",
" 115 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ACKr_reverse | \n",
" 8.695792e-07 | \n",
" 2.139235e+04 | \n",
" 0.000000 | \n",
" 28.262368 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" True | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
" | 100 | \n",
" 116 | \n",
" WTEColi_113C80_U13C20_01 | \n",
" 2021-04-16 18:29:37 | \n",
" ACS_reverse | \n",
" 8.695792e-07 | \n",
" 2.139235e+04 | \n",
" 0.000000 | \n",
" 28.262368 | \n",
" mmol*gDCW-1*hr-1 | \n",
" 0.05 | \n",
" None | \n",
" None | \n",
" None | \n",
" True | \n",
" True | \n",
" None | \n",
"
\n",
" \n",
"
\n",
"
101 rows × 16 columns
\n",
"
"
],
"text/plain": [
" index simulation_id simulation_dateAndTime rxn_id \\\n",
"0 0 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 26dap_DASH_MSYN \n",
"1 1 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ACONTa \n",
"2 2 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ACONTa_reverse \n",
"3 3 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 AKGDH \n",
"4 4 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ALATA_L \n",
".. ... ... ... ... \n",
"96 111 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 GLYAT \n",
"97 112 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ACONTb_reverse \n",
"98 113 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 PGK_reverse \n",
"99 115 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ACKr_reverse \n",
"100 116 WTEColi_113C80_U13C20_01 2021-04-16 18:29:37 ACS_reverse \n",
"\n",
" flux flux_stdev flux_lb flux_ub flux_units \\\n",
"0 2.295040e-01 2.607999e-03 0.224392 0.234616 mmol*gDCW-1*hr-1 \n",
"1 2.074886e+00 1.699686e+04 1.185984 1000.000000 mmol*gDCW-1*hr-1 \n",
"2 8.690514e-07 1.543259e+04 0.000000 28.927600 mmol*gDCW-1*hr-1 \n",
"3 1.423617e-01 7.673616e+03 0.000000 1.919800 mmol*gDCW-1*hr-1 \n",
"4 3.435520e-01 3.904002e-03 0.335900 0.351204 mmol*gDCW-1*hr-1 \n",
".. ... ... ... ... ... \n",
"96 1.000000e-07 6.096786e-12 0.000000 0.035201 mmol*gDCW-1*hr-1 \n",
"97 8.690514e-07 1.543259e+04 0.000000 28.927600 mmol*gDCW-1*hr-1 \n",
"98 8.689295e-07 9.920303e+03 0.000000 24.418874 mmol*gDCW-1*hr-1 \n",
"99 8.695792e-07 2.139235e+04 0.000000 28.262368 mmol*gDCW-1*hr-1 \n",
"100 8.695792e-07 2.139235e+04 0.000000 28.262368 mmol*gDCW-1*hr-1 \n",
"\n",
" fit_alf fit_chi2s fit_cor fit_cov free used_ comment_ \n",
"0 0.05 None None None False True None \n",
"1 0.05 None None None False True None \n",
"2 0.05 None None None True True None \n",
"3 0.05 None None None False True None \n",
"4 0.05 None None None False True None \n",
".. ... ... ... ... ... ... ... \n",
"96 0.05 None None None False True None \n",
"97 0.05 None None None True True None \n",
"98 0.05 None None None True True None \n",
"99 0.05 None None None True True None \n",
"100 0.05 None None None True True None \n",
"\n",
"[101 rows x 16 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fittedFluxes"
]
},
{
"cell_type": "markdown",
"id": "58206bfc",
"metadata": {},
"source": [
"The reactions that are acutally separate (i.e. non-overlapping bounds, exchange fluxes) are a problem. COBRA has some ways to account for that but they seem to be quite involved. An easier way to deal with that is that just add the reverse reaction as a separate reaction to the model; it's the same reaction, just with the inverse direction. The following method is \"destructive\", i.e. it will alter the model. Be aware of that."
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "85fc40e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['ACONTa',\n",
" 'FUM',\n",
" 'GAPD',\n",
" 'ICDHyr',\n",
" 'MlthfSYN',\n",
" 'PGM',\n",
" 'PTAr',\n",
" 'ACONTb',\n",
" 'PGK',\n",
" 'ACKr',\n",
" 'ACS']"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rxns_to_split"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "8c0cb14b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"- Added ACONTa to model\n",
"- Added FUM to model\n",
"- Added GAPD to model\n",
"- Added ICDHyr to model\n",
"# Could not add MlthfSYN to model\n",
"- Added PGM to model\n",
"- Added PTAr to model\n",
"- Added ACONTb to model\n",
"- Added PGK to model\n",
"- Added ACKr to model\n",
"- Added ACS to model\n"
]
}
],
"source": [
"cobra_add_split_rxns(rxns_to_split, model)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "becc5617",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 26dap_DASH_MSYN\n",
"5 ArgSYN\n",
"16 EX_co2_e_unlabeled\n",
"17 EX_glc_e\n",
"21 FADR\n",
"32 GluSYN\n",
"35 HisSYN\n",
"39 IleSYN\n",
"40 LeuSYN\n",
"45 MetSYN\n",
"46 MlthfSYN\n",
"47 MlthfSYN_reverse\n",
"50 NADH\n",
"57 PheSYN\n",
"60 ProSYN\n",
"68 SerSYN\n",
"69 SUCCOAS\n",
"73 ThrSYN\n",
"74 TKT1a\n",
"75 TKT1b\n",
"76 TKT2a\n",
"77 TKT2b\n",
"79 TrpSYN\n",
"80 TyrSYN\n",
"81 ValSYN\n",
"83 NADH\n",
"84 CYTBD\n",
"85 HYD\n",
"86 ATPS4r\n",
"89 CYTBD\n",
"90 HYD\n",
"91 ATPS4r\n",
"Name: rxn_id, dtype: object"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_rxn_overlap(fittedFluxes, model)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "cdae7dde",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"32.0 %\n"
]
}
],
"source": [
"rxn_coverage(fittedFluxes, model)"
]
},
{
"cell_type": "markdown",
"id": "f170ebdb",
"metadata": {},
"source": [
"Ok... well, guess you can't please everyone. You could of course find a way to add the remaining reactions manually or investigate further how to distribute their values to other reactions but this is as far as we will go here."
]
},
{
"cell_type": "markdown",
"id": "88d5a965",
"metadata": {},
"source": [
"For the next steps, please check the `MFA_feasibility_and_sampling` notebook"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "e4ec00a0",
"metadata": {},
"outputs": [],
"source": [
"fittedFluxes.to_pickle(\"data/MFA_sampling/preprocessed_fittedFluxes.obj\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "dde3ee59",
"metadata": {},
"outputs": [],
"source": [
"cobra.io.save_json_model(model, \"data/MFA_sampling/preprocessed_model.json\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99d4c41a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "bfair",
"language": "python",
"name": "bfair"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}