@inbook{52b656fd3a4045ba9a0b4cfe14dfb338,
title = "A graph-based framework for web document mining",
abstract = "In this paper we describe methods of performing data mining on web documents, where the web document content is represented by graphs. We show how traditional clustering and classification methods, which usually operate on vector representations of data, can be extended to work with graph-based data. Specifically, we give graphtheoretic extensions of the k-Nearest Neighbors classification algorithm and the k-means clustering algorithm that process graphs, and show how the retention of structural information can lead to improved performance over the case of the vector model approach. We introduce several different types of web document representations that utilize graphs and compare their performance for clustering and classification.",
author = "Adam Schenker and Horst Bunke and Mark Last and Abraham Kandel",
year = "2004",
month = jan,
day = "1",
doi = "10.1007/978-3-540-28640-0\_38",
language = "English",
isbn = "3540230602",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "401--412",
editor = "Simone Marinai and Andreas Dengel",
booktitle = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
address = "Germany",
}