@inproceedings{08edde0a2fb143bd921dcf9c9bfcc04c,
title = "Using scale-space anisotropic smoothing for text line extraction in historical documents",
abstract = "Text line extraction is vital pre-requisite for various document processing tasks. This paper presents a novel approach for text line extraction which is based on Gaussian scale space and dedicated binarization that utilize the inherent structure of smoothed text document images. It enhances the text lines in the image using multiscale anisotropic second derivative of Gaussian filter bank at the average height of the text line. It then applies a binarization, which is based on component-tree and is tailored towards line extraction. The final stage of the algorithm is based on an energy minimization framework for removing spurious text line and assigning connected components to lines. We have tested our approach on various datasets written in different languages at range of image quality and received high detection rates, which outperform state-of-the-art algorithms. Our MATLAB code is publicly available. (http://www.cs.bgu.ac.il/~rafico/LineExtraction.zip).",
keywords = "Historical document processing, Text lines extraction",
author = "Rafi Cohen and Itshak Dinstein and Jihad El-Sana and Klara Kedem",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2014.; 11th International Conference on Image Analysis and Recognition, ICIAR 2014 ; Conference date: 22-10-2014 Through 24-10-2014",
year = "2014",
month = jan,
day = "1",
doi = "10.1007/978-3-319-11758-4_38",
language = "English",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "349--358",
editor = "Mohamed Kamel and Aur{\'e}lio Campilho",
booktitle = "Image Analysis and Recognition - 11th International Conference, ICIAR 2014, Proceedings",
address = "Germany",
}