This example estimates reaction rates for the reactions \(A \rightarrow B \rightarrow C\) based on experimental data on concentrations of \(A\), \(B\) and \(C\) over time. This example is taken from the book Chemical Reactor Analysis and Design - Rawlings and Ekerdt The parameter estimation problem is to find the rate constants \(k1\), \(k2\) and initial concentrations of A, B, and C that minimize the sum of squares error between experimental and predicted data. \[ \min \;\; \sum_{i = 1}^N((ca(t_i) - ca^{meas}(t_i))^2 + (cb(t_i) - cb^{meas}(t_i))^2 + (cc(t_i) - cc^{meas}(t_i))^2) \\ \frac{dca}{dt} = -k_1ca \\ \frac{dcb}{dt} = k_1ca - k_2cb \\ \frac{dcc}{dt} = k_2cb \\ ca[0] = ca0 \\ cb[0] = cb0 \\ cc[0] = cc0 \\ 0 \leq t \leq t_f \] where experimental data on concentrations is measured at \(N\) time points \(\{t_1, t_2, \ldots, t_N\}\)
# set-up python version to use
reticulate::use_python("/opt/anaconda3/bin/python")
library(reticulate)
# import pyomo
pyo = import("pyomo.environ", convert = FALSE)
pydae = import("pyomo.dae", convert = FALSE)
bi = import_builtins()
# had signal handling issues when using solver. Turned
# off signal handling based on this discussion thread
# https://github.com/PyUtilib/pyutilib/issues/31
pyulib = import("pyutilib.subprocess.GlobalData")
pyulib$DEFINE_SIGNAL_HANDLERS_DEFAULT = FALSE
# printing model to a file, reading it back into R to print in R markdown
mdl_print = function(m) {
printstr = "
with open('tmp_model.txt', 'w') as f:
r.mdl.pprint(ostream=f)
f.close()
"
printstr = gsub("mdl", m, printstr)
py_run_string(printstr)
printstr_R = paste(readLines("tmp_model.txt"), collapse = "\n")
return(printstr_R)
}
# get operators
source("operators.R")
# load other libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
# read exprimental data (this data was generated using simulation)
data_df = read.csv("python_codes/ABC_data.csv", stringsAsFactors = FALSE)
head(data_df)
## t ca cb cc
## 1 0.000 0.957 -0.031 -0.015
## 2 0.263 0.557 0.330 0.044
## 3 0.526 0.342 0.512 0.156
## 4 0.789 0.224 0.499 0.310
## 5 1.053 0.123 0.428 0.454
## 6 1.316 0.079 0.396 0.556
# convert data for each component into a dictionary
data_df_py = r_to_py(data_df)$set_index('t')$to_dict()
ca_meas = data_df_py['ca']
cb_meas = data_df_py['cb']
cc_meas = data_df_py['cc']
# check the dictionary for component A
py_run_string("print(r.ca_meas)")
# create model object
M = pyo$ConcreteModel()
# initial concentration estimates
M$ca0 = pyo$Param(initialize = 1.0)
M$cb0 = pyo$Param(initialize = 0.0)
M$cc0 = pyo$Param(initialize = 0.0)
# time variable
M$t = pydae$ContinuousSet(bounds = tuple(0.0, 5.0), initialize = data_df$t)
# parameters: measurement times, measured concentrations ca, cb, cc
M$t_meas = pyo$Set(within = M$t, initialize = data_df$t)
M$ca_meas = pyo$Param(M$t_meas, initialize = ca_meas)
M$cb_meas = pyo$Param(M$t_meas, initialize = cb_meas)
M$cc_meas = pyo$Param(M$t_meas, initialize = cc_meas)
# variables: rate constants for the 2 reactions A->B, B->C
M$k1 = pyo$Var(initialize= 0.5, bounds = tuple(1e-4, 10))
M$k2 = pyo$Var(initialize = 3, bounds = tuple(1e-4, 10))
# variables: concentration of A, B, C over time
M$ca = pyo$Var(M$t, initialize = M$ca0, bounds = tuple(0.0, M$ca0))
M$cb = pyo$Var(M$t, initialize = M$cb0, bounds = tuple(0.0, M$ca0))
M$cc = pyo$Var(M$t, initialize = M$cc0, bounds = tuple(0.0, M$ca0))
# derivative variables of concentration (wrt time) for A, B, and C
M$ca_dot = pydae$DerivativeVar(M$ca, wrt = M$t)
M$cb_dot = pydae$DerivativeVar(M$cb, wrt = M$t)
M$cc_dot = pydae$DerivativeVar(M$cc, wrt = M$t)
# constraint: ca_dot = -k1*ca
dcarate_rule = function(m, i) {
expr = m$ca_dot[i] == 0-m$k1 * m$ca[i]
return(expr)
}
M$dcarate_cons = pyo$Constraint(M$t, rule = dcarate_rule)
# constraint: cb_dot = k1*ca - k2*cb
dcbrate_rule = function(m, t) {
expr = m$cb_dot[t] == m$k1 * m$ca[t] - m$k2 * m$cb[t]
return(expr)
}
M$dcbrate_cons = pyo$Constraint(M$t, rule = dcbrate_rule)
# constraint: cc_dot = k2*cb
dccrate_rule = function(m, t) {
expr = m$cc_dot[t] == m$k2 * m$cb[t]
return(expr)
}
M$dccrate_cons = pyo$Constraint(M$t, rule = dccrate_rule)
# objective: minimize sum of square error
# Note: I had to use tuple to index time point since otherwise it was giving an error
ssq_rule = function(m) {
ssq = 0
for (i in bi$list(m$t_meas)) {
ssq = ssq + (m$ca[tuple(i)] - m$ca_meas[tuple(i)])**2 + (m$cb[tuple(i)] - m$cb_meas[tuple(i)])**2 +
(m$cc[tuple(i)] - m$cc_meas[tuple(i)])**2
}
return(ssq)
}
M$ssq_obj = pyo$Objective(rule = ssq_rule, sense = pyo$minimize)
# apply collocation
disc = pyo$TransformationFactory('dae.collocation')
disc$apply_to(M, nfe=20L, ncp=2L)
## None
# solve using IPOPT
solver = pyo$SolverFactory('ipopt')
solver$solve(M, logfile = "tmp.log")
##
## Problem:
## - Lower bound: -inf
## Upper bound: inf
## Number of objectives: 1
## Number of constraints: 243
## Number of variables: 248
## Sense: unknown
## Solver:
## - Status: ok
## Message: Ipopt 3.12.12\x3a Optimal Solution Found
## Termination condition: optimal
## Id: 0
## Error rc: 0
## Time: 0.13445234298706055
## Solution:
## - number of solutions: 0
## number of solutions displayed: 0
cat(paste(readLines("tmp.log"), collapse = "\n"))
## Solver command line: ['/opt/anaconda3/bin/ipopt', '/var/folders/h0/0h0h0yrs6f76syrmv1_q13mw0000gn/T/tmpbmx190u9.pyomo.nl', '-AMPL']
##
## Ipopt 3.12.12:
##
## ******************************************************************************
## This program contains Ipopt, a library for large-scale nonlinear optimization.
## Ipopt is released as open source code under the Eclipse Public License (EPL).
## For more information visit http://projects.coin-or.org/Ipopt
## ******************************************************************************
##
## This is Ipopt version 3.12.12, running with linear solver mumps.
## NOTE: Other linear solvers might be more efficient (see Ipopt documentation).
##
## Number of nonzeros in equality constraint Jacobian...: 931
## Number of nonzeros in inequality constraint Jacobian.: 0
## Number of nonzeros in Lagrangian Hessian.............: 142
##
## Total number of variables............................: 248
## variables with only lower bounds: 0
## variables with lower and upper bounds: 125
## variables with only upper bounds: 0
## Total number of equality constraints.................: 243
## Total number of inequality constraints...............: 0
## inequality constraints with only lower bounds: 0
## inequality constraints with lower and upper bounds: 0
## inequality constraints with only upper bounds: 0
##
## iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls
## 0 2.8934592e+01 4.95e-01 1.02e+00 -1.0 0.00e+00 - 0.00e+00 0.00e+00 0
## 1 2.2519268e+01 2.57e-01 1.30e+01 -1.0 2.74e+00 - 3.46e-01 1.00e+00f 1
## 2 1.3768718e+01 1.51e-02 2.01e+01 -1.0 2.59e-01 - 1.48e-01 1.00e+00f 1
## 3 1.0084400e+01 5.29e-01 1.86e+01 -1.0 2.87e+01 - 3.08e-03 6.62e-02f 1
## 4 7.7476958e+00 4.52e-01 2.43e+01 -1.0 1.22e+00 - 3.50e-02 5.80e-01f 1
## 5 4.2518473e+00 5.52e-02 3.31e+00 -1.0 5.75e-01 - 3.12e-01 1.00e+00f 1
## 6 2.8756274e+00 9.26e-03 4.91e-01 -1.0 1.11e-01 - 7.86e-01 1.00e+00f 1
## 7 1.7749222e+00 1.05e-02 2.66e-01 -1.7 9.77e-02 - 7.10e-01 1.00e+00f 1
## 8 1.2267171e+00 6.86e-03 8.01e-02 -1.7 1.20e-01 - 1.00e+00 1.00e+00h 1
## 9 7.0107064e-01 3.33e-02 1.35e-01 -2.5 3.06e-01 - 6.72e-01 7.65e-01h 1
## iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls
## 10 3.6336354e-01 3.79e-02 3.29e-01 -2.5 3.14e-01 - 1.00e+00 1.00e+00h 1
## 11 2.0598424e-01 2.87e-02 1.85e+00 -2.5 4.97e-01 - 1.00e+00 6.29e-01h 1
## 12 9.6160658e-02 1.58e-02 5.35e+01 -2.5 3.86e-01 - 1.95e-01 1.00e+00h 1
## 13 1.1430487e-01 6.64e-04 1.01e+00 -2.5 6.32e-02 - 8.95e-01 1.00e+00h 1
## 14 1.1116252e-01 2.98e-05 6.86e-03 -2.5 1.45e-02 - 1.00e+00 1.00e+00h 1
## 15 5.3989408e-02 1.11e-02 6.98e-01 -3.8 4.77e-01 - 8.36e-01 5.93e-01f 1
## 16 2.7085055e-02 1.25e-02 4.39e-01 -3.8 3.56e-01 - 1.00e+00 1.00e+00h 1
## 17 2.5313842e-02 1.53e-03 1.11e+01 -3.8 1.34e-01 - 6.09e-01 1.00e+00h 1
## 18 2.5541917e-02 2.30e-04 8.29e-02 -3.8 3.96e-02 - 1.00e+00 1.00e+00h 1
## 19 2.5655258e-02 8.63e-06 5.25e-04 -3.8 9.86e-03 - 1.00e+00 1.00e+00h 1
## iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls
## 20 2.5108487e-02 4.24e-04 4.11e-02 -5.7 6.93e-02 - 9.88e-01 1.00e+00h 1
## 21 2.5084520e-02 7.68e-06 8.07e-04 -5.7 9.71e-03 - 1.00e+00 1.00e+00h 1
## 22 2.5081665e-02 4.99e-07 1.98e-06 -5.7 2.68e-03 - 1.00e+00 1.00e+00h 1
## 23 2.5080597e-02 5.57e-07 2.57e-04 -8.6 2.68e-03 - 9.94e-01 1.00e+00h 1
## 24 2.5080451e-02 4.77e-08 2.59e-08 -8.6 8.30e-04 - 1.00e+00 1.00e+00h 1
## 25 2.5080440e-02 4.90e-09 7.75e-10 -8.6 2.66e-04 - 1.00e+00 1.00e+00h 1
## 26 2.5080440e-02 1.02e-10 1.78e-10 -9.0 3.82e-05 - 1.00e+00 1.00e+00h 1
##
## Number of Iterations....: 26
##
## (scaled) (unscaled)
## Objective...............: 2.5080439899454476e-02 2.5080439899454476e-02
## Dual infeasibility......: 1.7762472076263818e-10 1.7762472076263818e-10
## Constraint violation....: 1.0174727727019217e-10 1.0174727727019217e-10
## Complementarity.........: 1.2365039316920796e-09 1.2365039316920796e-09
## Overall NLP error.......: 1.2365039316920796e-09 1.2365039316920796e-09
##
##
## Number of objective function evaluations = 27
## Number of objective gradient evaluations = 27
## Number of equality constraint evaluations = 27
## Number of inequality constraint evaluations = 0
## Number of equality constraint Jacobian evaluations = 27
## Number of inequality constraint Jacobian evaluations = 0
## Number of Lagrangian Hessian evaluations = 26
## Total CPU secs in IPOPT (w/o function evaluations) = 0.101
## Total CPU secs in NLP function evaluations = 0.002
##
## EXIT: Optimal Solution Found.
##
# Get the estimated parameters
k1_est = M$k1()
k2_est = M$k2()
print(paste0("k1 = ", round(py_to_r(k1_est), 2), "; k2 = ", round(py_to_r(k2_est), 2)))
## [1] "k1 = 2.01; k2 = 0.99"
# Create a tibble of simulated concentration profiles at the
# estimated rate constants k1_est and k2_est
t_sim = c()
ca_sim = c()
cb_sim = c()
cc_sim = c()
tlist = as.numeric(bi$list(M$t))
for(i in seq_along(tlist)) {
t_sim[i] = tlist[i]
ca_sim[i] = py_to_r(M$ca[tuple(tlist[i])]())
cb_sim[i] = py_to_r(M$cb[tuple(tlist[i])]())
cc_sim[i] = py_to_r(M$cc[tuple(tlist[i])]())
}
c_sim_df = tibble(t_sim, ca_sim, cb_sim, cc_sim)
c_sim_df = c_sim_df %>% rename(ca = ca_sim, cb = cb_sim, cc = cc_sim)
c_sim_df_l = pivot_longer(c_sim_df, ca:cc, names_to = "comp", values_to = "conc")
data_df_l = pivot_longer(data_df, ca:cc, names_to = "comp", values_to = "conc")
# plot experimental vs simulated data
p = ggplot()
p = p + geom_line(data = c_sim_df_l, aes(x = t_sim, y = conc, color = comp))
p = p + geom_point(data = data_df_l, aes(x = t, y = conc, color = comp))
p = p + labs(x = "time", y = "concentration")
p = p + theme_bw()
p
sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.2 tidyr_1.1.0 dplyr_1.0.0 reticulate_1.16
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.4.6 knitr_1.28 magrittr_1.5 munsell_0.5.0
## [5] tidyselect_1.1.0 colorspace_1.4-1 lattice_0.20-41 R6_2.4.1
## [9] rlang_0.4.6 stringr_1.4.0 highr_0.8 tools_4.0.1
## [13] grid_4.0.1 gtable_0.3.0 xfun_0.14 withr_2.2.0
## [17] htmltools_0.5.0 ellipsis_0.3.1 yaml_2.2.1 digest_0.6.25
## [21] tibble_3.0.1 lifecycle_0.2.0 crayon_1.3.4 Matrix_1.2-18
## [25] farver_2.0.3 purrr_0.3.4 vctrs_0.3.1 glue_1.4.1
## [29] evaluate_0.14 rmarkdown_2.3 labeling_0.3 stringi_1.4.6
## [33] compiler_4.0.1 pillar_1.4.4 scales_1.1.1 generics_0.0.2
## [37] jsonlite_1.6.1 pkgconfig_2.0.3
# pyomo version used is 5.7