@inproceedings{97ebf5e0edf243e7971f7c2780d8cb51,
title = "Helix: DGA Domain Embeddings for Tracking and Exploring Botnets",
abstract = "Botnets have been using domain generation algorithms (DGA) for over a decade to covertly and robustly identify the domain name of their command and control servers (C&C). Recent advancements in DGA detection has motivated botnet owners to rapidly alter the C&C domain and use adversarial techniques to evade detection. As a result, it has become increasingly difficult to track botnets in DNS traffic. In this paper, we present Helix, a method for tracking and exploring botnets. Helix uses a spatio-temporal deep neural network autoencoder to convert domains into numerical vectors (embeddings) which capture the DGA and seed used to create the domain. This is made possible by leveraging both convolutional (spatial) and recurrent (temporal) layers, and by using techniques such as attention mechanisms and highways. Furthermore, by using an autoencoder architecture, the network can be trained in an unsupervised manner (no labeling of data) which makes the system practical for real world deployments. In our evaluation, we found that Helix can track botnet campaigns, distinguish between DGA families and seeds, and can identify domains generated using the latest adversarial machine learning techniques. Helix is currently being used to track botnets in one of the world's largest Internet Service Providers (ISP), and we include some of the ISP's analysis work using our method.",
keywords = "autoencoder, botnet, cnn, dga, dns, embedding, lstm",
author = "Lior Sidi and Yisroel Mirsky and Asaf Nadler and Yuval Elovici and Asaf Shabtai",
note = "Publisher Copyright: {\textcopyright} 2020 ACM.; 29th ACM International Conference on Information and Knowledge Management, CIKM 2020 ; Conference date: 19-10-2020 Through 23-10-2020",
year = "2020",
month = oct,
day = "19",
doi = "10.1145/3340531.3416022",
language = "English",
series = "International Conference on Information and Knowledge Management, Proceedings",
publisher = "Association for Computing Machinery",
pages = "2741--2748",
booktitle = "CIKM 2020 - Proceedings of the 29th ACM International Conference on Information and Knowledge Management",
}