@inbook{de61cdc88c5446f281cfe52d4c6bbdfc,
title = "Improved Multi-label Medical Text Classification Using Features Cooperation",
abstract = "Medical text categorization is a valuable area of text classification due to the massive growth in the amount of medical data, most of which is unstructured. Reading and understanding the information contained in millions of medical documents is a time-consuming process. Automatic text classification aims to automatically classify text documents into one or more predefined categories according to several criteria such as the type of output (multi-label or mono label). Feature extraction task plays an important role in text classification. Extracting informative features highly increases the performance of the classification models and reduces the computational complexity. Traditional feature extraction methods are based on handcrafted features which mainly depend on prior knowledge. The use of these features may involve an insignificant representation. Doc2vec is a way to generate a vector of informative and essential features that are specific to a document. In this paper, the impact of combining handcrafted and doc2vec features in the multi-label document classification scenario is analyzed by proposing a system named MUL-MEDTEC. The one-versus-all classification strategy based on logistic regression is adopted in this study to predict for each medical text it to one or several labels. Experimental results based on Ohsumed medical dataset are very encouraging with based classification accuracy equal to 0.92 as global precision.",
keywords = "Doc2vec, Handcrafted features, Medical text, Multi-label classification, Text categorization",
author = "Rim Chaib and Nabiha Azizi and Nawel Zemmal and Didier Schwab and Belhaouari, {Samir Brahim}",
note = "Publisher Copyright: {\textcopyright} 2021, The Author(s), under exclusive license to Springer Nature Switzerland AG.",
year = "2021",
doi = "10.1007/978-3-030-70713-2_7",
language = "English",
series = "Lecture Notes on Data Engineering and Communications Technologies",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "61--71",
booktitle = "Lecture Notes on Data Engineering and Communications Technologies",
address = "Germany",
}