@inproceedings{54a44f5472db48409e4f92e538bf76ab,
title = "When consensus meets self-stabilization: Self-stabilizing failure-detector, consensus and replicated state-machine",
abstract = "This paper presents a self-stabilizing failure detector, asynchronous consensus and replicated state-machine algorithm suite, the components of which can be started in an arbitrary state and converge to act as a virtual state-machine. Self-stabilizing algorithms can cope with transient faults. Transient faults can alter the system state to an arbitrary state and hence, cause a temporary violation of the safety property of the consensus. New requirements for consensus that fit the on-going nature of self-stabilizing algorithms are presented. The wait-free consensus (and the replicated state-machine) algorithm presented is a classic combination of a failure detector and a (memory bounded) rotating coordinator consensus that satisfy both eventual safety and eventual liveness. Several new techniques and paradigms are introduced. The bounded memory failure detector abstracts away synchronization assumptions using bounded heartbeat counters combined with a balance-unbalance mechanism. The practically infinite paradigm is introduced in the scope of self-stabilization, where an execution of, say, 264 sequential steps is regarded as (practically) infinite. Finally, we present the first self-stabilizing wait-free reset mechanism that ensures eventual safety and can be used in other scopes.",
keywords = "Consensus, Distributed Reset, Failure Detector, Self-Stabilization, State-Machine, Wait-Free",
author = "Shlomi Dolev and Kat, {Ronen I.} and Schiller, {Elad M.}",
year = "2006",
month = jan,
day = "1",
doi = "10.1007/11945529_5",
language = "English",
isbn = "9783540499909",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "45--63",
booktitle = "Principles of Distributed Systems - 10th International Conference, OPODIS 2006, Proceedings",
address = "Germany",
note = "10th International Conference on Principles of Distributed Systems, OPODIS 2006 ; Conference date: 12-12-2006 Through 15-12-2006",
}