@inproceedings{4205efea257d41f5a1dc3fb5e6d072f1,
title = "A robust framework for classifying evolving document streams in an expert-machine-crowd setting",
abstract = "An emerging challenge in the online classification of social media data streams is to keep the categories used for classification up-To-date. In this paper, we propose an innovative framework based on an Expert-Machine-Crowd (EMC) triad to help categorize items by continuously identifying novel concepts in heterogeneous data streams often riddled with outliers. We unify constrained clustering and outlier detection by formulating a novel optimization problem: COD-Means. We design an algorithm to solve the COD-Means problem and show that COD-Means will not only help detect novel categories but also seamlessly discover human annotation errors and improve the overall quality of the categorization process. Experiments on diverse real data sets demonstrate that our approach is both effective and efficient.",
keywords = "Novel concept detection, Outlier detection, Social media, Stream classification, Text classification",
author = "Muhammad Imran and Sanjay Chawla and Carlos Castillo",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 16th IEEE International Conference on Data Mining, ICDM 2016 ; Conference date: 12-12-2016 Through 15-12-2016",
year = "2016",
month = jul,
day = "2",
doi = "10.1109/ICDM.2016.143",
language = "English",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "961--966",
editor = "Francesco Bonchi and Josep Domingo-Ferrer and Ricardo Baeza-Yates and Zhi-Hua Zhou and Xindong Wu",
booktitle = "Proceedings - 16th IEEE International Conference on Data Mining, ICDM 2016",
address = "United States",
}