@inproceedings{d5bab81c3d7941df9e26bee015234140,
title = "Data readiness report",
abstract = "Data exploration and quality analysis is an important yet tedious process in the AI pipeline. Current data cleaning and data readiness assessment practices for machine learning tasks are mostly conducted in an arbitrary manner which limits their reuse and often results in loss of productivity. We introduce the concept of a Data Readiness Report as accompanying documentation to a dataset that allows data consumers to get detailed insights into the quality of data. Data characteristics and challenges on various quality dimensions are identified and documented, keeping in mind the principles of transparency and explainability. The Data Readiness Report also serves as a record of all data assessment operations, including applied transformations. This provides a detailed lineage for data governance and management. In effect, the report captures and documents the actions taken by various personas in a data readiness and assessment workflow. Over time this becomes a repository of best practices and can potentially drive a recommendation system for building automated data readiness workflows on the lines of AutoML [1]. The data readiness report could serve as a valuable asset for organizing and operationalizing data in a Data-as-a-service model as it augments the trust and reliability of the datasets. We anticipate that together with the Datasheets [2], Dataset Nutrition Label [3], FactSheets [4] and Model Cards [5], the Data Readiness Report completes the AI documentation pipeline and increases trust and re-useability of data.",
keywords = "Data assurance and trust, Data documentation, Data quality, Governance, Machine learning datasets",
author = "Shazia Afzal and C. Rajmohan and Manish Kesarwani and Sameep Mehta and Hima Patel",
note = "Publisher Copyright: {\textcopyright}2021 IEEE; 2021 IEEE International Conference on Smart Data Services, SMDS 2021 ; Conference date: 05-09-2021 Through 11-09-2021",
year = "2021",
doi = "10.1109/SMDS53860.2021.00016",
language = "English",
series = "Proceedings - 2021 IEEE International Conference on Smart Data Services, SMDS 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "42--51",
editor = "Nimanthi Atukorala and Chang, {Carl K.} and Ernesto Damiani and {Fu Lizhi}, Min and George Spanoudakis and Mudhakar Srivatsa and Zhongjie Wang and Jia Zhang",
booktitle = "Proceedings - 2021 IEEE International Conference on Smart Data Services, SMDS 2021",
address = "United States",
}