@inproceedings{cbbd55429c7747c985f84043c6082e25,
title = "Invited Paper: Common Public Knowledge for Enhancing Machine Learning Data Sets",
abstract = "In this study, we show the advantages of incorporating multi-source knowledge from publicly available sources, such as ChatGPT and Wikipedia, into existing datasets to enhance the performance of machine learning models for routine tasks, such as classification. specifically, we propose the utilization of supplementary data from external sources and demonstrate the utility of widely accessible knowledge in the context of the Forest Cover Type Prediction task launched by the Roosevelt National Forest of Northern Colorado. Additionally, we exhibit an improvement in classification accuracy for the Isolated Letter Speech Recognition dataset when incorporating information on regional accents in the prediction of spoken English letter names.",
keywords = "ChatGPT, Feature engineering, Forest management, Isolated letter, Machine learning, Ontology, Random forests, Speech recognition, Tree cover type, World knowledge",
author = "Shlomi Dolev and Arnon Ilani",
note = "Publisher Copyright: {\textcopyright} 2023 Copyright held by the owner/author(s).; 5th Workshop on Advanced Tools, Programming Languages, and PLatforms for Implementing and Evaluating Algorithms for Distributed Systems, ApPLIED 2023 ; Conference date: 19-06-2023",
year = "2023",
month = jun,
day = "19",
doi = "10.1145/3584684.3597263",
language = "English",
series = "Proceedings of the 5th Workshop on Advanced Tools, programming Languages, and PLatforms for Implementing and Evaluating Algorithms for Distributed Systems, ApPLIED 2023",
publisher = "Association for Computing Machinery, Inc",
booktitle = "Proceedings of the 5th Workshop on Advanced Tools, programming Languages, and PLatforms for Implementing and Evaluating Algorithms for Distributed Systems, ApPLIED 2023",
}