/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*   File:         mpi_kmeans.c  (MPI version)                               */
/*   Description:  Implementation of parallel k-means clustering algorithm   */
/*                                                                           */
/*   Author:  Wei-keng Liao                                                  */
/*            EECS Department, Northwestern University                       */
/*            email: wkliao@eecs.northwestern.edu                            */
/*                                                                           */
/*   Copyright (C) 2013, Northwestern University                             */
/*   See COPYRIGHT notice in top-level directory.                            */
/*                                                                           */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#include <stdio.h>
#include <stdlib.h>

#include <mpi.h>
#include "kmeans.h"


/*----< euclid_dist_2() >----------------------------------------------------*/
/* square of Euclid distance between two multi-dimensional points            */
__inline static
float euclid_dist_2(int       numdims,  /* no. dimensions */
                    float    *coord1,   /* [numdims] */
                    DATATYPE *coord2)   /* [numdims] */
{
    int i;
    float ans=0.0;

    for (i=0; i<numdims; i++)
        ans += (coord1[i]-coord2[i]) * (coord1[i]-coord2[i]);

    return(ans);
}

/*----< find_nearest_cluster() >---------------------------------------------*/
__inline static
size_t find_nearest_cluster(long long   numClusters,/* no. clusters */
                            int         numCoords,  /* no. coordinates */
                            DATATYPE   *object,     /* [numCoords] */
                            float     **clusters)   /* [numClusters][numCoords] */
{
    size_t index, i;
    float dist, min_dist;

    /* find the cluster id that has min distance to object */
    index    = 0;
    min_dist = euclid_dist_2(numCoords, clusters[0], object);

    for (i=1; i<numClusters; i++) {
        dist = euclid_dist_2(numCoords, clusters[i], object);
        /* no need square root */
        if (dist < min_dist) { /* find the min and its array index */
            min_dist = dist;
            index    = i;
        }
    }
    return(index);
}

/*----< mpi_kmeans() >-------------------------------------------------------*/
int mpi_kmeans(DATATYPE  **objects,     /* in: [numObjs][numCoords] */
               int         numCoords,   /* no. coordinates */
               long long   numObjs,     /* no. local objects */
               long long   numClusters, /* no. total clusters */
               float       threshold,   /* % objects change membership */
               long long  *membership,  /* out: [numObjs] */
               float     **clusters,    /* out: [numClusters][numCoords] */
               MPI_Comm    comm)        /* MPI communicator */
{
    size_t     i, j, index;
    int        rank, loop=0;
    long long  total_numObjs;
    long long *newClusterSize;/* [numClusters]: no. objects assigned in each
                                  new cluster */
    long long *clusterSize;   /* [numClusters]: temp buffer for Allreduce */
    float      delta;         /* % of objects change their clusters */
    float      delta_tmp;
    float    **newClusters;   /* [numClusters][numCoords] */
    extern int _debug;

    if (_debug) MPI_Comm_rank(comm, &rank);

    /* initialize membership[] */
    for (i=0; i<numObjs; i++) membership[i] = -1;

    /* need to initialize newClusterSize and newClusters[0] to all 0 */
    newClusterSize = (long long*) calloc(numClusters, sizeof(long long));
    assert(newClusterSize != NULL);
    clusterSize    = (long long*) calloc(numClusters, sizeof(long long));
    assert(clusterSize != NULL);

    newClusters    = (float**) malloc(numClusters *            sizeof(float*));
    assert(newClusters != NULL);
    newClusters[0] = (float*)  calloc(numClusters * numCoords, sizeof(float));
    assert(newClusters[0] != NULL);
    for (i=1; i<numClusters; i++)
        newClusters[i] = newClusters[i-1] + numCoords;

    MPI_Allreduce(&numObjs, &total_numObjs, 1, MPI_LONG_LONG, MPI_SUM, comm);
    if (_debug) printf("%2d: numObjs=%lld total_numObjs=%lld numClusters=%lld numCoords=%d\n",rank,numObjs,total_numObjs,numClusters,numCoords);

    do {
        double curT = MPI_Wtime();
        delta = 0.0;
        for (i=0; i<numObjs; i++) {
            /* find the array index of nestest cluster center */
            index = find_nearest_cluster(numClusters, numCoords, objects[i],
                                         clusters);

            /* if membership changes, increase delta by 1 */
            if (membership[i] != index) delta += 1.0;

            /* assign the membership to object i */
            membership[i] = index;

            /* update new cluster centers : sum of objects located within */
            newClusterSize[index]++;
            for (j=0; j<numCoords; j++)
                newClusters[index][j] += objects[i][j];
        }

        /* sum all data objects in newClusters */
        float     *newClusters_ptr = newClusters[0];
        float        *clusters_ptr =    clusters[0];
        long long nChunks = numClusters*numCoords / 134217728;
        int       rem     = numClusters*numCoords % 134217728;
        if (rem > 0) nChunks++;
        while (nChunks > 1) {
            MPI_Allreduce(newClusters_ptr, clusters_ptr, 134217728,
                          MPI_FLOAT, MPI_SUM, comm);
            newClusters_ptr += 134217728;
               clusters_ptr += 134217728;
            nChunks--;
        }
        MPI_Allreduce(newClusters_ptr, clusters_ptr, rem, MPI_FLOAT, MPI_SUM,
                      comm);
        MPI_Allreduce(newClusterSize, clusterSize, numClusters, MPI_LONG_LONG,
                      MPI_SUM, comm);

        /* average the sum and replace old cluster centers with newClusters */
        for (i=0; i<numClusters; i++) {
            for (j=0; j<numCoords; j++) {
                if (clusterSize[i] > 1)
                    clusters[i][j] /= clusterSize[i];
                newClusters[i][j] = 0.0;   /* set back to 0 */
            }
            newClusterSize[i] = 0;   /* set back to 0 */
        }
            
        MPI_Allreduce(&delta, &delta_tmp, 1, MPI_FLOAT, MPI_SUM, comm);
        delta = delta_tmp / total_numObjs;

        if (_debug) {
            double maxTime;
            curT = MPI_Wtime() - curT;
            MPI_Reduce(&curT, &maxTime, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
            if (rank == 0)
                printf("%2d: loop=%d time=%f sec\n",rank,loop,curT);
        }
    } while (delta > threshold && loop++ < 500);

    if (_debug && rank == 0)
        printf("%2d: delta=%f threshold=%f loop=%d\n",rank,delta,threshold,loop);

    free(newClusters[0]);
    free(newClusters);
    free(newClusterSize);
    free(clusterSize);

    return 1;
}

