@inproceedings{e12f4c8988f94e4aacecd7fc3ac39d4e,
title = "Team-Imitate-Synchronize for Solving Dec-POMDPs.",
abstract = "Multi-agent collaboration under partial observability is a difficult task. Multi-agent reinforcement learning (MARL) algorithms that do not leverage a model of the environment struggle with tasks that require sequences of collaborative actions, while Dec-POMDP algorithms that use such models to compute near-optimal policies, scale poorly. In this paper, we suggest the Team-Imitate-Synchronize (TIS) approach, a heuristic, model-based method for solving such problems. Our approach begins by solving the joint team problem, assuming that observations are shared. Then, for each agent we solve a single agent problem designed to imitate its behavior within the team plan. Finally, we adjust the single agent policies for better synchronization. Our experiments demonstrate that our method provides comparable solutions to Dec-POMDP solvers over small problems, while scaling to much larger problems, and provides collaborative plans that MARL algorithms are unable to identify.",
author = "Eliran Abdoo and Brafman, {Ronen I.} and Guy Shani and Nitsan Soffair",
note = "DBLP License: DBLP's bibliographic metadata records provided through http://dblp.org/ are distributed under a Creative Commons CC0 1.0 Universal Public Domain Dedication. Although the bibliographic metadata records are provided consistent with CC0 1.0 Dedication, the content described by the metadata records is not. Content may be subject to copyright, rights of privacy, rights of publicity and other restrictions.",
year = "2023",
month = mar,
day = "17",
doi = "10.1007/978-3-031-26412-2_14",
language = "English",
isbn = "978-3-031-26411-5",
series = "Lecture Notes in Computer Science",
publisher = "Springer Cham",
pages = "216--232",
editor = "Amini, {Massih-Reza } and Canu, {St{\'e}phane } and Fischer, {Asja } and Guns, {Tias } and Novak, {Petra Kralj } and Tsoumakas, {Grigorios }",
booktitle = "Joint European Conference on Machine Learning and Knowledge Discovery in Databases ECML/PKDD (4)",
}