@inproceedings{13fb56a62f704ce386cf938aa7951f9b,
title = "Sentence Compression as a Supervised Learning with a Rich Feature Space",
abstract = "We present a novel supervised approach to sentence compression, based on classification and removal of word sequences generated from subtrees of the original sentence dependency tree. Our system may use any known classifier like Support Vector Machines or Logistic Model Tree to identify word sequences that can be removed without compromising the grammatical correctness of the compressed sentence. We trained our system using several classifiers on a small annotated dataset of 100 sentences, which included around 1500 manually labeled subtrees (removal candidates) represented by 25 features. The highest cross-validation classification accuracy of 80% was obtained with the SMO (Normalized Poly Kernel) algorithm. We evaluated the readability and the informativeness of the sentences compressed by the SMO-based classification model with the help of human raters using a separate benchmark dataset of 200 sentences.",
keywords = "Sentence compression, Supervised learning, Syntactic dependencies",
author = "Elena Churkin and Mark Last and Marina Litvak and Natalia Vanetik",
note = "Publisher Copyright: {\textcopyright} 2023, Springer Nature Switzerland AG.; 19th International Conference on Computational Linguistics and Intelligent Text Processing, CICLing 2018 ; Conference date: 18-03-2018 Through 24-03-2018",
year = "2023",
month = jan,
day = "1",
doi = "10.1007/978-3-031-23804-8_21",
language = "English",
isbn = "9783031238031",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "261--271",
editor = "Alexander Gelbukh",
booktitle = "Computational Linguistics and Intelligent Text Processing - 19th International Conference, CICLing 2018, Revised Selected Papers",
address = "Germany",
}