@inbook{e86573f1be704e47944d8628c5c719d7,
title = "A decision tree framework for semi-automatic extraction of product attributes from the web",
abstract = "Semi-Automatic extraction of product attributes from URLs is an important issue for comparison-shopping agents. In this paper we examine a novel decision tree framework for extracting product attributes. The core induction algorithmic framework consists of three main stages. In the first stage, a large set of regular expression-based patterns are induced by employing a longest common subsequence algorithm. In the second stage we filter the initial set and leave only the most useful patterns. In the last stage we represent the extraction problem (in which the domain values are not known in advance) as a classification problem and employ an ensemble of decision trees. An empirical study performed on a real-world extraction tasks illustrates the capability of the proposed framework.",
author = "Lior Rokach and Roni Romano and Barak Chizi and Oded Maimon",
year = "2006",
month = sep,
day = "27",
doi = "10.1007/3-540-33880-2_21",
language = "English",
isbn = "3540338799",
series = "Studies in Computational Intelligence",
pages = "201--210",
editor = "Mark Last and Piotr Szczepaniak and Piotr Szczepaniak and Zeev Vlvolkov and Abraham Kandel",
booktitle = "Advances in Web Intelligence and Data Mining",
}