Kinetics Parameter Estimation in Pyomo using R

This example estimates reaction rates for the reactions \(A \rightarrow B \rightarrow C\) based on experimental data on concentrations of \(A\), \(B\) and \(C\) over time. This example is taken from the book Chemical Reactor Analysis and Design - Rawlings and Ekerdt The parameter estimation problem is to find the rate constants \(k1\), \(k2\) and initial concentrations of A, B, and C that minimize the sum of squares error between experimental and predicted data. \[ \min \;\; \sum_{i = 1}^N((ca(t_i) - ca^{meas}(t_i))^2 + (cb(t_i) - cb^{meas}(t_i))^2 + (cc(t_i) - cc^{meas}(t_i))^2) \\ \frac{dca}{dt} = -k_1ca \\ \frac{dcb}{dt} = k_1ca - k_2cb \\ \frac{dcc}{dt} = k_2cb \\ ca[0] = ca0 \\ cb[0] = cb0 \\ cc[0] = cc0 \\ 0 \leq t \leq t_f \] where experimental data on concentrations is measured at \(N\) time points \(\{t_1, t_2, \ldots, t_N\}\)

# set-up python version to use
reticulate::use_python("/opt/anaconda3/bin/python")
library(reticulate)

# import pyomo
pyo = import("pyomo.environ", convert = FALSE)
pydae = import("pyomo.dae", convert = FALSE)
bi = import_builtins()

# had signal handling issues when using solver. Turned
# off signal handling based on this discussion thread
# https://github.com/PyUtilib/pyutilib/issues/31
pyulib = import("pyutilib.subprocess.GlobalData")
pyulib$DEFINE_SIGNAL_HANDLERS_DEFAULT = FALSE

# printing model to a file, reading it back into R to print in R markdown
mdl_print = function(m) {
  printstr = "
with open('tmp_model.txt', 'w') as f:
  r.mdl.pprint(ostream=f)
f.close()
              "
  printstr = gsub("mdl", m, printstr)
  py_run_string(printstr)
  printstr_R = paste(readLines("tmp_model.txt"), collapse = "\n")
  return(printstr_R)
}

# get operators
source("operators.R")

# load other libraries
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)

# read exprimental data (this data was generated using simulation)
data_df = read.csv("python_codes/ABC_data.csv", stringsAsFactors = FALSE)
head(data_df)

##       t    ca     cb     cc
## 1 0.000 0.957 -0.031 -0.015
## 2 0.263 0.557  0.330  0.044
## 3 0.526 0.342  0.512  0.156
## 4 0.789 0.224  0.499  0.310
## 5 1.053 0.123  0.428  0.454
## 6 1.316 0.079  0.396  0.556

# convert data for each component into a dictionary
data_df_py = r_to_py(data_df)$set_index('t')$to_dict()
ca_meas = data_df_py['ca']
cb_meas = data_df_py['cb']
cc_meas = data_df_py['cc']

# check the dictionary for component A
py_run_string("print(r.ca_meas)")

# create model object
M = pyo$ConcreteModel()

# initial concentration estimates
M$ca0 = pyo$Param(initialize = 1.0)
M$cb0 = pyo$Param(initialize = 0.0)
M$cc0 = pyo$Param(initialize = 0.0)

# time variable
M$t = pydae$ContinuousSet(bounds = tuple(0.0, 5.0), initialize = data_df$t)

# parameters: measurement times, measured concentrations ca, cb, cc
M$t_meas = pyo$Set(within = M$t, initialize = data_df$t)
M$ca_meas = pyo$Param(M$t_meas, initialize = ca_meas)
M$cb_meas = pyo$Param(M$t_meas, initialize = cb_meas)
M$cc_meas = pyo$Param(M$t_meas, initialize = cc_meas)

# variables: rate constants for the 2 reactions A->B, B->C
M$k1 = pyo$Var(initialize= 0.5, bounds = tuple(1e-4, 10))
M$k2 = pyo$Var(initialize = 3, bounds = tuple(1e-4, 10))

# variables: concentration of A, B, C over time
M$ca = pyo$Var(M$t, initialize = M$ca0, bounds = tuple(0.0, M$ca0))
M$cb = pyo$Var(M$t, initialize = M$cb0, bounds = tuple(0.0, M$ca0))
M$cc = pyo$Var(M$t, initialize = M$cc0, bounds = tuple(0.0, M$ca0))

# derivative variables of concentration (wrt time) for A, B, and C
M$ca_dot = pydae$DerivativeVar(M$ca, wrt = M$t)
M$cb_dot = pydae$DerivativeVar(M$cb, wrt = M$t)
M$cc_dot = pydae$DerivativeVar(M$cc, wrt = M$t)

# constraint: ca_dot = -k1*ca
dcarate_rule = function(m, i) {
   expr = m$ca_dot[i] == 0-m$k1 * m$ca[i]
   return(expr)
}
M$dcarate_cons = pyo$Constraint(M$t, rule = dcarate_rule)

# constraint: cb_dot = k1*ca - k2*cb
dcbrate_rule = function(m, t) {
  expr = m$cb_dot[t] == m$k1 * m$ca[t] - m$k2 * m$cb[t]
  return(expr)
}
M$dcbrate_cons = pyo$Constraint(M$t, rule = dcbrate_rule)

# constraint: cc_dot = k2*cb
dccrate_rule = function(m, t) {
  expr = m$cc_dot[t] == m$k2 * m$cb[t]
  return(expr)
}
M$dccrate_cons = pyo$Constraint(M$t, rule = dccrate_rule)

# objective: minimize sum of square error
# Note: I had to use tuple to index time point since otherwise it was giving an error
ssq_rule = function(m) {
  ssq = 0
  for (i in bi$list(m$t_meas)) {
    ssq = ssq + (m$ca[tuple(i)] - m$ca_meas[tuple(i)])**2 + (m$cb[tuple(i)] - m$cb_meas[tuple(i)])**2 +
      (m$cc[tuple(i)] - m$cc_meas[tuple(i)])**2
  }
  return(ssq)
}
M$ssq_obj = pyo$Objective(rule = ssq_rule, sense = pyo$minimize)

# apply collocation
disc = pyo$TransformationFactory('dae.collocation')
disc$apply_to(M, nfe=20L, ncp=2L)

## None

# solve using IPOPT
solver = pyo$SolverFactory('ipopt')
solver$solve(M, logfile = "tmp.log")

## 
## Problem: 
## - Lower bound: -inf
##   Upper bound: inf
##   Number of objectives: 1
##   Number of constraints: 243
##   Number of variables: 248
##   Sense: unknown
## Solver: 
## - Status: ok
##   Message: Ipopt 3.12.12\x3a Optimal Solution Found
##   Termination condition: optimal
##   Id: 0
##   Error rc: 0
##   Time: 0.13445234298706055
## Solution: 
## - number of solutions: 0
##   number of solutions displayed: 0

cat(paste(readLines("tmp.log"), collapse = "\n"))

## Solver command line: ['/opt/anaconda3/bin/ipopt', '/var/folders/h0/0h0h0yrs6f76syrmv1_q13mw0000gn/T/tmpbmx190u9.pyomo.nl', '-AMPL']
## 
## Ipopt 3.12.12: 
## 
## ******************************************************************************
## This program contains Ipopt, a library for large-scale nonlinear optimization.
##  Ipopt is released as open source code under the Eclipse Public License (EPL).
##          For more information visit http://projects.coin-or.org/Ipopt
## ******************************************************************************
## 
## This is Ipopt version 3.12.12, running with linear solver mumps.
## NOTE: Other linear solvers might be more efficient (see Ipopt documentation).
## 
## Number of nonzeros in equality constraint Jacobian...:      931
## Number of nonzeros in inequality constraint Jacobian.:        0
## Number of nonzeros in Lagrangian Hessian.............:      142
## 
## Total number of variables............................:      248
##                      variables with only lower bounds:        0
##                 variables with lower and upper bounds:      125
##                      variables with only upper bounds:        0
## Total number of equality constraints.................:      243
## Total number of inequality constraints...............:        0
##         inequality constraints with only lower bounds:        0
##    inequality constraints with lower and upper bounds:        0
##         inequality constraints with only upper bounds:        0
## 
## iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
##    0  2.8934592e+01 4.95e-01 1.02e+00  -1.0 0.00e+00    -  0.00e+00 0.00e+00   0
##    1  2.2519268e+01 2.57e-01 1.30e+01  -1.0 2.74e+00    -  3.46e-01 1.00e+00f  1
##    2  1.3768718e+01 1.51e-02 2.01e+01  -1.0 2.59e-01    -  1.48e-01 1.00e+00f  1
##    3  1.0084400e+01 5.29e-01 1.86e+01  -1.0 2.87e+01    -  3.08e-03 6.62e-02f  1
##    4  7.7476958e+00 4.52e-01 2.43e+01  -1.0 1.22e+00    -  3.50e-02 5.80e-01f  1
##    5  4.2518473e+00 5.52e-02 3.31e+00  -1.0 5.75e-01    -  3.12e-01 1.00e+00f  1
##    6  2.8756274e+00 9.26e-03 4.91e-01  -1.0 1.11e-01    -  7.86e-01 1.00e+00f  1
##    7  1.7749222e+00 1.05e-02 2.66e-01  -1.7 9.77e-02    -  7.10e-01 1.00e+00f  1
##    8  1.2267171e+00 6.86e-03 8.01e-02  -1.7 1.20e-01    -  1.00e+00 1.00e+00h  1
##    9  7.0107064e-01 3.33e-02 1.35e-01  -2.5 3.06e-01    -  6.72e-01 7.65e-01h  1
## iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
##   10  3.6336354e-01 3.79e-02 3.29e-01  -2.5 3.14e-01    -  1.00e+00 1.00e+00h  1
##   11  2.0598424e-01 2.87e-02 1.85e+00  -2.5 4.97e-01    -  1.00e+00 6.29e-01h  1
##   12  9.6160658e-02 1.58e-02 5.35e+01  -2.5 3.86e-01    -  1.95e-01 1.00e+00h  1
##   13  1.1430487e-01 6.64e-04 1.01e+00  -2.5 6.32e-02    -  8.95e-01 1.00e+00h  1
##   14  1.1116252e-01 2.98e-05 6.86e-03  -2.5 1.45e-02    -  1.00e+00 1.00e+00h  1
##   15  5.3989408e-02 1.11e-02 6.98e-01  -3.8 4.77e-01    -  8.36e-01 5.93e-01f  1
##   16  2.7085055e-02 1.25e-02 4.39e-01  -3.8 3.56e-01    -  1.00e+00 1.00e+00h  1
##   17  2.5313842e-02 1.53e-03 1.11e+01  -3.8 1.34e-01    -  6.09e-01 1.00e+00h  1
##   18  2.5541917e-02 2.30e-04 8.29e-02  -3.8 3.96e-02    -  1.00e+00 1.00e+00h  1
##   19  2.5655258e-02 8.63e-06 5.25e-04  -3.8 9.86e-03    -  1.00e+00 1.00e+00h  1
## iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
##   20  2.5108487e-02 4.24e-04 4.11e-02  -5.7 6.93e-02    -  9.88e-01 1.00e+00h  1
##   21  2.5084520e-02 7.68e-06 8.07e-04  -5.7 9.71e-03    -  1.00e+00 1.00e+00h  1
##   22  2.5081665e-02 4.99e-07 1.98e-06  -5.7 2.68e-03    -  1.00e+00 1.00e+00h  1
##   23  2.5080597e-02 5.57e-07 2.57e-04  -8.6 2.68e-03    -  9.94e-01 1.00e+00h  1
##   24  2.5080451e-02 4.77e-08 2.59e-08  -8.6 8.30e-04    -  1.00e+00 1.00e+00h  1
##   25  2.5080440e-02 4.90e-09 7.75e-10  -8.6 2.66e-04    -  1.00e+00 1.00e+00h  1
##   26  2.5080440e-02 1.02e-10 1.78e-10  -9.0 3.82e-05    -  1.00e+00 1.00e+00h  1
## 
## Number of Iterations....: 26
## 
##                                    (scaled)                 (unscaled)
## Objective...............:   2.5080439899454476e-02    2.5080439899454476e-02
## Dual infeasibility......:   1.7762472076263818e-10    1.7762472076263818e-10
## Constraint violation....:   1.0174727727019217e-10    1.0174727727019217e-10
## Complementarity.........:   1.2365039316920796e-09    1.2365039316920796e-09
## Overall NLP error.......:   1.2365039316920796e-09    1.2365039316920796e-09
## 
## 
## Number of objective function evaluations             = 27
## Number of objective gradient evaluations             = 27
## Number of equality constraint evaluations            = 27
## Number of inequality constraint evaluations          = 0
## Number of equality constraint Jacobian evaluations   = 27
## Number of inequality constraint Jacobian evaluations = 0
## Number of Lagrangian Hessian evaluations             = 26
## Total CPU secs in IPOPT (w/o function evaluations)   =      0.101
## Total CPU secs in NLP function evaluations           =      0.002
## 
## EXIT: Optimal Solution Found.
##

# Get the estimated parameters
k1_est = M$k1()
k2_est = M$k2()

print(paste0("k1 = ", round(py_to_r(k1_est), 2), "; k2 = ", round(py_to_r(k2_est), 2)))

## [1] "k1 = 2.01; k2 = 0.99"

# Create a tibble of simulated concentration profiles at the 
# estimated rate constants k1_est and k2_est
t_sim = c()
ca_sim = c()
cb_sim = c()
cc_sim = c()

tlist = as.numeric(bi$list(M$t))
for(i in seq_along(tlist)) {
  t_sim[i] = tlist[i]
  ca_sim[i] = py_to_r(M$ca[tuple(tlist[i])]())
  cb_sim[i] = py_to_r(M$cb[tuple(tlist[i])]())
  cc_sim[i] = py_to_r(M$cc[tuple(tlist[i])]())
}

c_sim_df = tibble(t_sim, ca_sim, cb_sim, cc_sim)
c_sim_df = c_sim_df %>% rename(ca = ca_sim, cb = cb_sim, cc = cc_sim)
c_sim_df_l = pivot_longer(c_sim_df, ca:cc, names_to = "comp", values_to = "conc")
data_df_l = pivot_longer(data_df, ca:cc, names_to = "comp", values_to = "conc") 

# plot experimental vs simulated data
p = ggplot()
p = p + geom_line(data = c_sim_df_l, aes(x = t_sim, y = conc, color = comp))
p = p + geom_point(data = data_df_l, aes(x = t, y = conc, color = comp))
p = p + labs(x = "time", y = "concentration")
p = p + theme_bw()
p

Session Info

sessionInfo()

## R version 4.0.1 (2020-06-06)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_3.3.2   tidyr_1.1.0     dplyr_1.0.0     reticulate_1.16
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.4.6     knitr_1.28       magrittr_1.5     munsell_0.5.0   
##  [5] tidyselect_1.1.0 colorspace_1.4-1 lattice_0.20-41  R6_2.4.1        
##  [9] rlang_0.4.6      stringr_1.4.0    highr_0.8        tools_4.0.1     
## [13] grid_4.0.1       gtable_0.3.0     xfun_0.14        withr_2.2.0     
## [17] htmltools_0.5.0  ellipsis_0.3.1   yaml_2.2.1       digest_0.6.25   
## [21] tibble_3.0.1     lifecycle_0.2.0  crayon_1.3.4     Matrix_1.2-18   
## [25] farver_2.0.3     purrr_0.3.4      vctrs_0.3.1      glue_1.4.1      
## [29] evaluate_0.14    rmarkdown_2.3    labeling_0.3     stringi_1.4.6   
## [33] compiler_4.0.1   pillar_1.4.4     scales_1.1.1     generics_0.0.2  
## [37] jsonlite_1.6.1   pkgconfig_2.0.3

# pyomo version used is 5.7

Kinetics Parameter Estimation in Pyomo using R

notesofdabbler

2020-07-01

Session Info