@inproceedings{0b9dbb6ea04c4598beb72bdcfbadbdb3,
title = "Reinforcement Learning n RDPs by Combining Deep RL with Automata Learning",
abstract = "Regular Decision Processes (RDPs) are a recently introduced model for decision-making in non-Markovian domains in which states are not postulated a-priori, and the next observation depends in a regular manner on past history. As such, they provide a more succinct and understandable model of the dynamics and reward function. Existing algorithms for learning RDPs attempt to learn an automaton that reflects the regularity of the underlying domain. However, their scalability is limited due to the practical difficulty of learning automata. In this paper we propose to leverage the power of Deep reinforcement learning in partially observable domain to learn RDPs: First, we learn an RNN-based policy. Then, we generate an automaton that reflects the policy's structure and use our old data to transform it into an MDP, which we solve. This results in a finite, explainable policy structure, and, as our empirical evaluation on old and new RDP benchmarks shows, much better sample complexity.",
author = "Tal Shahar and Brafman, {Ronen I.}",
note = "Publisher Copyright: {\textcopyright} 2023 The Authors.; 26th European Conference on Artificial Intelligence, ECAI 2023 ; Conference date: 30-09-2023 Through 04-10-2023",
year = "2023",
month = sep,
day = "28",
doi = "10.3233/FAIA230504",
language = "English",
series = "Frontiers in Artificial Intelligence and Applications",
publisher = "IOS Press BV",
pages = "2097--2104",
editor = "Kobi Gal and Kobi Gal and Ann Nowe and Nalepa, {Grzegorz J.} and Roy Fairstein and Roxana Radulescu",
booktitle = "ECAI 2023 - 26th European Conference on Artificial Intelligence, including 12th Conference on Prestigious Applications of Intelligent Systems, PAIS 2023 - Proceedings",
address = "Netherlands",
}