@inproceedings{9e3a85c8072a4f0b90b391489e023f27,
title = "Synthesising Reward Machines for Cooperative Multi-Agent Reinforcement Learning",
abstract = "Reward machines have recently been proposed as a means of encoding team tasks in cooperative multi-agent reinforcement learning. The resulting multi-agent reward machine is then decomposed into individual reward machines, one for each member of the team, allowing agents to learn in a decentralised manner while still achieving the team task. However, current work assumes the multi-agent reward machine to be given. In this paper, we show how reward machines for team tasks can be synthesised automatically from an Alternating-Time Temporal Logic specification of the desired team behaviour and a high-level abstraction of the agents{\textquoteright} environment. We present results suggesting that our automated approach has comparable, if not better, sample efficiency than reward machines generated by hand for multi-agent tasks.",
author = "Giovanni Varricchione and Natasha Alechina and Mehdi Dastani and Brian Logan",
year = "2023",
month = sep,
day = "7",
doi = "10.1007/978-3-031-43264-4_21",
language = "English",
isbn = "9783031432637",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "328–344",
editor = "Vadim Malvone and Aniello Murano",
booktitle = "Multi-Agent Systems - 20th European Conference, EUMAS 2023, Proceedings",
}