@article{MAKHILLIJSC20127521093,
    title = {Genetic Algorithm Based Dimensionality Reduction for Improving Performance of K-Means Clustering: A Case Study for Categorization of Medical Dataset},
    journal = {International Journal of Soft Computing},
    volume = {7},
    number = {5},
    pages = {249-255},
    year = {2012},
    issn = {1816-9503},
    doi = {ijscomp.2012.249.255},
    url = {https://makhillpublications.co/view-article.php?issn=1816-9503&doi=ijscomp.2012.249.255},
    author = {Asha Gowda,Vidya,M.A. and},
    keywords = {k-means clustering,genetic algorithm,dimensionality reduction,wrapper approach,cluster center initialization,entropy based fuzzy clustering,medical dataset},
    abstract = {Medical data mining is the process of extracting hidden patterns 
  from medical data. Among the various clustering algorithms, k-means is the one 
  of most widely used clustering technique. The performance of k-means clustering 
  depends on the initial cluster centers and might converge to local optimum. 
  k-means does not guarantee unique clustering because it generates different 
  results with randomly chosen initial clusters for different runs of k-means. 
  In addition the performance of any data mining depends on feature subset selection. 
  This study attempts to improve performance of k-means clustering using two stages. 
  As part of first stage, this study investigates the use of wrapper approach 
  for feature selection for clustering where Genetic Algorithm (GA) is used as 
  a random search technique for subset generation, wrapped with k-means clustering. 
  In second stage, GA and Entropy based Fuzzy Clustering (EFC) are used to find 
  the initial centroid for k-means clustering. Experiments have been conducted 
  using standard medical dataset namely Pima Indians Diabetes Dataset (PIDD) and 
  Heart statlog. Results show markable reduction of 8.42 and 18.89% in the classification 
  error of k-means clustering for PIDD and Heart statlog dataset using features 
  identified by proposed wrapper approach and initial centroids identified by 
  GA when compared to k-means performance with all the features and centroids 
  initialized by random method for PIDD and Heart statlog dataset.}
    }