This example estimates reaction rates for the reactions $$A \rightarrow B \rightarrow C$$ based on experimental data on concentrations of $$A$$, $$B$$ and $$C$$ over time. This example is taken from the book Chemical Reactor Analysis and Design - Rawlings and Ekerdt The parameter estimation problem is to find the rate constants $$k1$$, $$k2$$ and initial concentrations of A, B, and C that minimize the sum of squares error between experimental and predicted data. $\min \;\; \sum_{i = 1}^N((ca(t_i) - ca^{meas}(t_i))^2 + (cb(t_i) - cb^{meas}(t_i))^2 + (cc(t_i) - cc^{meas}(t_i))^2) \\ \frac{dca}{dt} = -k_1ca \\ \frac{dcb}{dt} = k_1ca - k_2cb \\ \frac{dcc}{dt} = k_2cb \\ ca = ca0 \\ cb = cb0 \\ cc = cc0 \\ 0 \leq t \leq t_f$ where experimental data on concentrations is measured at $$N$$ time points $$\{t_1, t_2, \ldots, t_N\}$$

# set-up python version to use
reticulate::use_python("/opt/anaconda3/bin/python")
library(reticulate)

# import pyomo
pyo = import("pyomo.environ", convert = FALSE)
pydae = import("pyomo.dae", convert = FALSE)
bi = import_builtins()

# had signal handling issues when using solver. Turned
# off signal handling based on this discussion thread
# https://github.com/PyUtilib/pyutilib/issues/31
pyulib = import("pyutilib.subprocess.GlobalData")
pyulib$DEFINE_SIGNAL_HANDLERS_DEFAULT = FALSE # printing model to a file, reading it back into R to print in R markdown mdl_print = function(m) { printstr = " with open('tmp_model.txt', 'w') as f: r.mdl.pprint(ostream=f) f.close() " printstr = gsub("mdl", m, printstr) py_run_string(printstr) printstr_R = paste(readLines("tmp_model.txt"), collapse = "\n") return(printstr_R) } # get operators source("operators.R") # load other libraries library(dplyr) ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union library(tidyr) library(ggplot2) # read exprimental data (this data was generated using simulation) data_df = read.csv("python_codes/ABC_data.csv", stringsAsFactors = FALSE) head(data_df) ## t ca cb cc ## 1 0.000 0.957 -0.031 -0.015 ## 2 0.263 0.557 0.330 0.044 ## 3 0.526 0.342 0.512 0.156 ## 4 0.789 0.224 0.499 0.310 ## 5 1.053 0.123 0.428 0.454 ## 6 1.316 0.079 0.396 0.556 # convert data for each component into a dictionary data_df_py = r_to_py(data_df)$set_index('t')$to_dict() ca_meas = data_df_py['ca'] cb_meas = data_df_py['cb'] cc_meas = data_df_py['cc'] # check the dictionary for component A py_run_string("print(r.ca_meas)") # create model object M = pyo$ConcreteModel()

# initial concentration estimates
M$ca0 = pyo$Param(initialize = 1.0)
M$cb0 = pyo$Param(initialize = 0.0)
M$cc0 = pyo$Param(initialize = 0.0)

# time variable
M$t = pydae$ContinuousSet(bounds = tuple(0.0, 5.0), initialize = data_df$t) # parameters: measurement times, measured concentrations ca, cb, cc M$t_meas = pyo$Set(within = M$t, initialize = data_df$t) M$ca_meas = pyo$Param(M$t_meas, initialize = ca_meas)
M$cb_meas = pyo$Param(M$t_meas, initialize = cb_meas) M$cc_meas = pyo$Param(M$t_meas, initialize = cc_meas)

# variables: rate constants for the 2 reactions A->B, B->C
M$k1 = pyo$Var(initialize= 0.5, bounds = tuple(1e-4, 10))
M$k2 = pyo$Var(initialize = 3, bounds = tuple(1e-4, 10))

# variables: concentration of A, B, C over time
M$ca = pyo$Var(M$t, initialize = M$ca0, bounds = tuple(0.0, M$ca0)) M$cb = pyo$Var(M$t, initialize = M$cb0, bounds = tuple(0.0, M$ca0))
M$cc = pyo$Var(M$t, initialize = M$cc0, bounds = tuple(0.0, M$ca0)) # derivative variables of concentration (wrt time) for A, B, and C M$ca_dot = pydae$DerivativeVar(M$ca, wrt = M$t) M$cb_dot = pydae$DerivativeVar(M$cb, wrt = M$t) M$cc_dot = pydae$DerivativeVar(M$cc, wrt = M$t) # constraint: ca_dot = -k1*ca dcarate_rule = function(m, i) { expr = m$ca_dot[i] == 0-m$k1 * m$ca[i]
return(expr)
}
M$dcarate_cons = pyo$Constraint(M$t, rule = dcarate_rule) # constraint: cb_dot = k1*ca - k2*cb dcbrate_rule = function(m, t) { expr = m$cb_dot[t] == m$k1 * m$ca[t] - m$k2 * m$cb[t]
return(expr)
}
M$dcbrate_cons = pyo$Constraint(M$t, rule = dcbrate_rule) # constraint: cc_dot = k2*cb dccrate_rule = function(m, t) { expr = m$cc_dot[t] == m$k2 * m$cb[t]
return(expr)
}
M$dccrate_cons = pyo$Constraint(M$t, rule = dccrate_rule) # objective: minimize sum of square error # Note: I had to use tuple to index time point since otherwise it was giving an error ssq_rule = function(m) { ssq = 0 for (i in bi$list(m$t_meas)) { ssq = ssq + (m$ca[tuple(i)] - m$ca_meas[tuple(i)])**2 + (m$cb[tuple(i)] - m$cb_meas[tuple(i)])**2 + (m$cc[tuple(i)] - m$cc_meas[tuple(i)])**2 } return(ssq) } M$ssq_obj = pyo$Objective(rule = ssq_rule, sense = pyo$minimize)

# apply collocation
disc = pyo$TransformationFactory('dae.collocation') disc$apply_to(M, nfe=20L, ncp=2L)
## None
# solve using IPOPT
solver = pyo$SolverFactory('ipopt') solver$solve(M, logfile = "tmp.log")
##
## Problem:
## - Lower bound: -inf
##   Upper bound: inf
##   Number of objectives: 1
##   Number of constraints: 243
##   Number of variables: 248
##   Sense: unknown
## Solver:
## - Status: ok
##   Message: Ipopt 3.12.12\x3a Optimal Solution Found
##   Termination condition: optimal
##   Id: 0
##   Error rc: 0
##   Time: 0.13445234298706055
## Solution:
## - number of solutions: 0
##   number of solutions displayed: 0
cat(paste(readLines("tmp.log"), collapse = "\n"))
## Solver command line: ['/opt/anaconda3/bin/ipopt', '/var/folders/h0/0h0h0yrs6f76syrmv1_q13mw0000gn/T/tmpbmx190u9.pyomo.nl', '-AMPL']
##
## Ipopt 3.12.12:
##
## ******************************************************************************
## This program contains Ipopt, a library for large-scale nonlinear optimization.
##  Ipopt is released as open source code under the Eclipse Public License (EPL).
## ******************************************************************************
##
## This is Ipopt version 3.12.12, running with linear solver mumps.
## NOTE: Other linear solvers might be more efficient (see Ipopt documentation).
##
## Number of nonzeros in equality constraint Jacobian...:      931
## Number of nonzeros in inequality constraint Jacobian.:        0
## Number of nonzeros in Lagrangian Hessian.............:      142
##
## Total number of variables............................:      248
##                      variables with only lower bounds:        0
##                 variables with lower and upper bounds:      125
##                      variables with only upper bounds:        0
## Total number of equality constraints.................:      243
## Total number of inequality constraints...............:        0
##         inequality constraints with only lower bounds:        0
##    inequality constraints with lower and upper bounds:        0
##         inequality constraints with only upper bounds:        0
##
## iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
##    0  2.8934592e+01 4.95e-01 1.02e+00  -1.0 0.00e+00    -  0.00e+00 0.00e+00   0
##    1  2.2519268e+01 2.57e-01 1.30e+01  -1.0 2.74e+00    -  3.46e-01 1.00e+00f  1
##    2  1.3768718e+01 1.51e-02 2.01e+01  -1.0 2.59e-01    -  1.48e-01 1.00e+00f  1
##    3  1.0084400e+01 5.29e-01 1.86e+01  -1.0 2.87e+01    -  3.08e-03 6.62e-02f  1
##    4  7.7476958e+00 4.52e-01 2.43e+01  -1.0 1.22e+00    -  3.50e-02 5.80e-01f  1
##    5  4.2518473e+00 5.52e-02 3.31e+00  -1.0 5.75e-01    -  3.12e-01 1.00e+00f  1
##    6  2.8756274e+00 9.26e-03 4.91e-01  -1.0 1.11e-01    -  7.86e-01 1.00e+00f  1
##    7  1.7749222e+00 1.05e-02 2.66e-01  -1.7 9.77e-02    -  7.10e-01 1.00e+00f  1
##    8  1.2267171e+00 6.86e-03 8.01e-02  -1.7 1.20e-01    -  1.00e+00 1.00e+00h  1
##    9  7.0107064e-01 3.33e-02 1.35e-01  -2.5 3.06e-01    -  6.72e-01 7.65e-01h  1
## iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
##   10  3.6336354e-01 3.79e-02 3.29e-01  -2.5 3.14e-01    -  1.00e+00 1.00e+00h  1
##   11  2.0598424e-01 2.87e-02 1.85e+00  -2.5 4.97e-01    -  1.00e+00 6.29e-01h  1
##   12  9.6160658e-02 1.58e-02 5.35e+01  -2.5 3.86e-01    -  1.95e-01 1.00e+00h  1
##   13  1.1430487e-01 6.64e-04 1.01e+00  -2.5 6.32e-02    -  8.95e-01 1.00e+00h  1
##   14  1.1116252e-01 2.98e-05 6.86e-03  -2.5 1.45e-02    -  1.00e+00 1.00e+00h  1
##   15  5.3989408e-02 1.11e-02 6.98e-01  -3.8 4.77e-01    -  8.36e-01 5.93e-01f  1
##   16  2.7085055e-02 1.25e-02 4.39e-01  -3.8 3.56e-01    -  1.00e+00 1.00e+00h  1
##   17  2.5313842e-02 1.53e-03 1.11e+01  -3.8 1.34e-01    -  6.09e-01 1.00e+00h  1
##   18  2.5541917e-02 2.30e-04 8.29e-02  -3.8 3.96e-02    -  1.00e+00 1.00e+00h  1
##   19  2.5655258e-02 8.63e-06 5.25e-04  -3.8 9.86e-03    -  1.00e+00 1.00e+00h  1
## iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
##   20  2.5108487e-02 4.24e-04 4.11e-02  -5.7 6.93e-02    -  9.88e-01 1.00e+00h  1
##   21  2.5084520e-02 7.68e-06 8.07e-04  -5.7 9.71e-03    -  1.00e+00 1.00e+00h  1
##   22  2.5081665e-02 4.99e-07 1.98e-06  -5.7 2.68e-03    -  1.00e+00 1.00e+00h  1
##   23  2.5080597e-02 5.57e-07 2.57e-04  -8.6 2.68e-03    -  9.94e-01 1.00e+00h  1
##   24  2.5080451e-02 4.77e-08 2.59e-08  -8.6 8.30e-04    -  1.00e+00 1.00e+00h  1
##   25  2.5080440e-02 4.90e-09 7.75e-10  -8.6 2.66e-04    -  1.00e+00 1.00e+00h  1
##   26  2.5080440e-02 1.02e-10 1.78e-10  -9.0 3.82e-05    -  1.00e+00 1.00e+00h  1
##
## Number of Iterations....: 26
##
##                                    (scaled)                 (unscaled)
## Objective...............:   2.5080439899454476e-02    2.5080439899454476e-02
## Dual infeasibility......:   1.7762472076263818e-10    1.7762472076263818e-10
## Constraint violation....:   1.0174727727019217e-10    1.0174727727019217e-10
## Complementarity.........:   1.2365039316920796e-09    1.2365039316920796e-09
## Overall NLP error.......:   1.2365039316920796e-09    1.2365039316920796e-09
##
##
## Number of objective function evaluations             = 27
## Number of objective gradient evaluations             = 27
## Number of equality constraint evaluations            = 27
## Number of inequality constraint evaluations          = 0
## Number of equality constraint Jacobian evaluations   = 27
## Number of inequality constraint Jacobian evaluations = 0
## Number of Lagrangian Hessian evaluations             = 26
## Total CPU secs in IPOPT (w/o function evaluations)   =      0.101
## Total CPU secs in NLP function evaluations           =      0.002
##
## EXIT: Optimal Solution Found.
## 
# Get the estimated parameters
k1_est = M$k1() k2_est = M$k2()

print(paste0("k1 = ", round(py_to_r(k1_est), 2), "; k2 = ", round(py_to_r(k2_est), 2)))
##  "k1 = 2.01; k2 = 0.99"
# Create a tibble of simulated concentration profiles at the
# estimated rate constants k1_est and k2_est
t_sim = c()
ca_sim = c()
cb_sim = c()
cc_sim = c()

tlist = as.numeric(bi$list(M$t))
for(i in seq_along(tlist)) {
t_sim[i] = tlist[i]
ca_sim[i] = py_to_r(M$ca[tuple(tlist[i])]()) cb_sim[i] = py_to_r(M$cb[tuple(tlist[i])]())
cc_sim[i] = py_to_r(M\$cc[tuple(tlist[i])]())
}

c_sim_df = tibble(t_sim, ca_sim, cb_sim, cc_sim)
c_sim_df = c_sim_df %>% rename(ca = ca_sim, cb = cb_sim, cc = cc_sim)
c_sim_df_l = pivot_longer(c_sim_df, ca:cc, names_to = "comp", values_to = "conc")
data_df_l = pivot_longer(data_df, ca:cc, names_to = "comp", values_to = "conc")

# plot experimental vs simulated data
p = ggplot()
p = p + geom_line(data = c_sim_df_l, aes(x = t_sim, y = conc, color = comp))
p = p + geom_point(data = data_df_l, aes(x = t, y = conc, color = comp))
p = p + labs(x = "time", y = "concentration")
p = p + theme_bw()
p ### Session Info

sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
##  en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
##  stats     graphics  grDevices utils     datasets  methods   base
##
## other attached packages:
##  ggplot2_3.3.2   tidyr_1.1.0     dplyr_1.0.0     reticulate_1.16
##
## loaded via a namespace (and not attached):
##   Rcpp_1.0.4.6     knitr_1.28       magrittr_1.5     munsell_0.5.0
##   tidyselect_1.1.0 colorspace_1.4-1 lattice_0.20-41  R6_2.4.1
##   rlang_0.4.6      stringr_1.4.0    highr_0.8        tools_4.0.1
##  grid_4.0.1       gtable_0.3.0     xfun_0.14        withr_2.2.0
##  htmltools_0.5.0  ellipsis_0.3.1   yaml_2.2.1       digest_0.6.25
##  tibble_3.0.1     lifecycle_0.2.0  crayon_1.3.4     Matrix_1.2-18
##  farver_2.0.3     purrr_0.3.4      vctrs_0.3.1      glue_1.4.1
##  evaluate_0.14    rmarkdown_2.3    labeling_0.3     stringi_1.4.6
##  compiler_4.0.1   pillar_1.4.4     scales_1.1.1     generics_0.0.2
##  jsonlite_1.6.1   pkgconfig_2.0.3
# pyomo version used is 5.7