@inbook{e11fbce002a64896a15df2e46e652ba1,
title = "On-line control methods via simulation",
abstract = "In Chap. 5, we consider an approximate rolling-horizon control framework for solving infinite-horizon MDPs with large state/action spaces in an on-line manner by simulation. Specifically, we consider policies in which the system (either the actual system itself or a simulation model of the system) evolves to a particular state that is observed, and the action to be taken in that particular state is then computed on-line at the decision time, with a particular emphasis on the use of simulation. We first present an updating scheme involving multiplicative weights for updating a probability distribution over a restricted set of policies; this scheme can be used to estimate the optimal value function over this restricted set by sampling on the (restricted) policy space. The lower-bound estimate of the optimal value function is used for constructing on-line control policies, called (simulated) policy switching and parallel rollout. We also discuss an upper-bound-based method, called hindsight optimization. Finally, we present an algorithm, called approximate stochastic annealing, which combines Q-learning with the MARS algorithm of Sect. 4.6.1 to directly search the policy space.",
keywords = "Expense, Peha",
author = "Chang, \{Hyeong Soo\} and Jiaqiao Hu and Fu, \{Michael C.\} and Marcus, \{Steven I.\}",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag London 2013.",
year = "2013",
doi = "10.1007/978-1-4471-5022-0\_5",
language = "English",
series = "Communications and Control Engineering",
publisher = "Springer International Publishing",
number = "9781447150213",
pages = "179--218",
booktitle = "Communications and Control Engineering",
edition = "9781447150213",
}