@inbook{56a3ce21fc6740588dc27d05118b45bc,
title = "Multi-stage adaptive sampling algorithms",
abstract = "In Chap. 2, we present simulation-based algorithms for estimating the optimal value function in finite-horizon MDPs with large (possibly uncountable) state spaces, where the usual techniques of policy iteration and value iteration are either computationally impractical or infeasible to implement. We present two adaptive sampling algorithms that estimate the optimal value function by choosing actions to sample in each state visited on a finite-horizon simulated sample path. The first approach builds upon the expected regret analysis of multi-armed bandit models and uses upper confidence bounds to determine which action to sample next, whereas the second approach uses ideas from learning automata to determine the next sampled action. The first approach is also the predecessor of a closely related approach in artificial intelligence (AI) called Monte Carlo tree search that led to a breakthrough in developing the current best computer Go-playing programs.",
keywords = "Expense, Lost",
author = "Chang, \{Hyeong Soo\} and Jiaqiao Hu and Fu, \{Michael C.\} and Marcus, \{Steven I.\}",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag London 2013.",
year = "2013",
doi = "10.1007/978-1-4471-5022-0\_2",
language = "English",
series = "Communications and Control Engineering",
publisher = "Springer International Publishing",
number = "9781447150213",
pages = "19--60",
booktitle = "Communications and Control Engineering",
edition = "9781447150213",
}