@inproceedings{0abe94eb68744e82a897af37709a4086,
title = "T-Rex (Tree-Rectangles): Reformulating Decision Tree Traversal as Hyperrectangle Enclosure",
abstract = "Tree ensembles, random forests and gradient boosted trees, are useful in resource-limited machine learning deployments. However, traversing tree data structures is not cache friendly, which results in high latency during inference or regression. Tree traversal incurs random I/Os making inference memory bound. We present a system that trades many random I/Os for few sequential I/O by remapping a forest of trees into a single spatial index. It builds on the observation that each leaf in the forest encodes a hyperrectangle in the feature space. We make queries I/O efficient through pruning and space-filling curves. We then optimize computation through quantization of hyperrectangle boundaries and vectorization of enclosure queries. Our evaluation on a diverse set of benchmark datasets shows that the system reduces inference latency by 2 times in memory and 10 times for external memory with no detectable loss of accuracy.",
keywords = "I/O, efficient, hyperrectangle, random forest, tree ensemble",
author = "Meghana Madhyastha and Tamas Budavari and Vladmir Braverman and Joshua Vogelstein and Randal Burns",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 40th IEEE International Conference on Data Engineering, ICDE 2024 ; Conference date: 13-05-2024 Through 17-05-2024",
year = "2024",
month = jan,
day = "1",
doi = "10.1109/ICDE60146.2024.00145",
language = "English",
series = "Proceedings - International Conference on Data Engineering",
publisher = "Institute of Electrical and Electronics Engineers",
pages = "1792--1804",
booktitle = "Proceedings - 2024 IEEE 40th International Conference on Data Engineering, ICDE 2024",
address = "United States",
}