@inproceedings{4ad7edf1b4344a209f855bb109a1ff70,
title = "Accelerating the LZ-complexity algorithm",
abstract = "The Lempel Ziv complexity of a string has recently been used in pattern recognition and classification as part of a string distance function. Its main advantage is that it can measure dissimilarity between a pair of strings of different lengths. This is very useful for machine learning on unstructured data since such data is not restricted to a fixed input dimensionality. The standard computation of LZ-complexity is inherently serial and is not suitable for processing large unstructured data. Hence, we propose a parallel algorithm that computes the LZ-complexity of strings whose length is limited only by the amount of memory, typically in the tens of gigabytes. The algorithm is implemented in CUDA on a GPU. Its speed-up factor is approximately n2/3 for strings of length n, for at least up to n = 2Mb. For instance, on 2Mb strings, the speed-up is 150. We compare the execution times of kernel variants with shared and global memory. The more efficient variant obtains approximately 90% GPU utilization.",
keywords = "CUDA, GPU, LZ-complexity, string distance, UID distance",
author = "Joel Ratsaby and Alexander Timashkov",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 29th IEEE International Conference on Parallel and Distributed Systems, ICPADS 2023 ; Conference date: 17-12-2023 Through 21-12-2023",
year = "2023",
month = jan,
day = "1",
doi = "10.1109/ICPADS60453.2023.00038",
language = "English",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
publisher = "Institute of Electrical and Electronics Engineers",
pages = "200--207",
booktitle = "Proceedings - 2023 IEEE 29th International Conference on Parallel and Distributed Systems, ICPADS 2023",
address = "United States",
}