@inproceedings{0cd27e8a348f4563be22dfc397a01709,
title = "Solving Dec-POMDPs as POMDPs Using Imitation Learning",
abstract = "Dec-POMDPs model cooperative, sequential multi-agent decision problems. They are computationally challenging, and scaling up their performance is difficult. We describe a method for solving Dec-POMDPs in the paradigm of centralized planning with distributed execution. First, we solve a team POMDP in which agent observations are common knowledge. Then, each agent uses imitation learning to try and imitate its part of the centralized policy. Unlike some previous work, the agent not only tries to imitate its behavior within the team, but also its belief state. A final offline synchronization stage improves the likelihood that agents{\textquoteright} policies will be well-coordinated with each other. On standard Dec-POMDP benchmarks, our method performs better than the best Dec-POMDP model-based solution method, and QMIX, a leading multi-agent RL algorithm.",
keywords = "Dec-POMDP, Imitation Learning, POMDP",
author = "Ron Keller and Brafman, \{Ronen I.\}",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2026.; 26th International Conference on Principles and Practice of Multi-Agent Systems, PRIMA 2025 ; Conference date: 16-12-2025 Through 19-12-2025",
year = "2026",
month = jan,
day = "1",
doi = "10.1007/978-3-032-13562-9\_9",
language = "English",
isbn = "9783032135612",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "117--132",
editor = "Catalin Dima and Angelo Ferrando and Vadim Malvone",
booktitle = "PRIMA 2025",
address = "Germany",
}