@inproceedings{6d20613662db4cb7be09cea35b9b9365,
title = "Optimized Couplings for Watermarking Large Language Models",
abstract = "Large-language models (LLMs) are now able to produce text that is indistinguishable from human-generated content. This has fueled the development of watermarks that imprint a 'signal' in LLM-generated text with minimal perturbation of an LLM's output. This paper provides an analysis of text watermarking in a one-shot setting. Through the lens of hypothesis testing with side information, we formulate and analyze the fundamental trade-off between watermark detection power and distortion in generated textual quality. We argue that a key component in watermark design is generating a coupling between the side information shared with the watermark detector and a random partition of the LLM vocabulary. Our analysis identifies the optimal coupling and randomization strategy under the worst-case LLM next-token distribution that satisfies a minentropy constraint. We provide a closed-form expression of the resulting detection rate under the proposed scheme and quantify the cost in a max-min sense. Finally, we numerically compare the proposed scheme with the theoretical optimum.",
author = "Long, \{Carol Xuan\} and Dor Tsur and Verdun, \{Claudio Mayrink\} and Hsiang Hsu and Haim Permuter and Calmon, \{Flavio P.\}",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Symposium on Information Theory, ISIT 2025 ; Conference date: 22-06-2025 Through 27-06-2025",
year = "2025",
month = jan,
day = "1",
doi = "10.1109/ISIT63088.2025.11195457",
language = "English",
series = "IEEE International Symposium on Information Theory - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers",
booktitle = "ISIT 2025 - 2025 IEEE International Symposium on Information Theory, Proceedings",
address = "United States",
}