/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*   File:         mpi_main.c   (an MPI version)                             */
/*   Description:  main program that calls a parallel K-means subroutine     */
/*                 using Euclid distance.                                    */
/*                                                                           */
/*   Input file format:                                                      */
/*                 netCDF file                                               */
/*                                                                           */
/*   Author:  Wei-keng Liao                                                  */
/*            EECS Department Northwestern University                        */
/*            email: wkliao@eecs.northwestern.edu                            */
/*                                                                           */
/*   Copyright (C) 2013, Northwestern University                             */
/*   See COPYRIGHT notice in top-level directory.                            */
/*                                                                           */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>     /* strtok() */
#include <strings.h>    /* strcasecmp() */
#include <sys/types.h>  /* open() */
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>     /* getopt() */

#include <mpi.h>
#include <pnetcdf.h>

int      _debug;
#include "kmeans.h"

/*---< usage() >------------------------------------------------------------*/
static void usage(char *argv0, float threshold) {
    char *help =
        "Usage: %s [switches]\n"
        "       -i filename    : input netCDF file containing data to be clustered\n"
        "       -v var_name    : name of variable in the netCDF file to be clustered\n"
        "       -c filename    : name of netCDF file that contains the initial cluster centers\n"
        "                        if skipped, the same file from option \"-i\" is used\n"
        "       -k var_name    : name of variable in the netCDF to be used as the initial cluster centers\n"
        "                        if skipped, the variable name from the option \"-v\" is used\n"
        "       -n num_clusters: number of clusters (K, must > 1)\n"
        "       -t threshold   : threshold value (default 0.0010)\n"
        "       -o             : output file name\n"
        "       -q             : quiet mode\n"
        "       -d             : enable debug mode\n"
        "       -h             : print this help information\n";
    fprintf(stderr, help, argv0, threshold);
}

/*---< main() >-------------------------------------------------------------*/
int main(int argc, char **argv) {
           int     opt;
    extern char   *optarg;
    extern int     optind;
           int     i, j, err, minErr;
           int     is_print_usage, verbose;

           int        numCoords;
           long long  numObjs;       /* number of local data objects */
           long long  totalNumObjs=0;/* total number of data objects */
           long long  numClusters;   /* total number of clusters */
           long long *membership;    /* [numObjs] */

           char   *filename, *outFileName;
           char   *var_name;
           char   *centers_filename;
           char   *centers_name;
           DATATYPE **objects;    /* [numObjs][numCoords] data objects */
           float **clusters;      /* [numClusters][numCoords] cluster center */
           float   threshold;
           double  timing, io_timing, clustering_timing;

           int     rank, nproc, mpi_namelen;
           char    mpi_name[MPI_MAX_PROCESSOR_NAME];

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Get_processor_name(mpi_name,&mpi_namelen);

    /* some default values */
    _debug           = 0;
    verbose          = 1;
    threshold        = 0.001;
    numClusters      = 0;
    is_print_usage   = 0;
    filename         = NULL;
    outFileName      = NULL;
    var_name         = NULL;
    centers_filename = NULL;
    centers_name     = NULL;

    while ( (opt=getopt(argc,argv,"i:v:c:k:n:t:o:qdh"))!= EOF) {
        switch (opt) {
            case 'i': filename=optarg;
                      break;
            case 'v': var_name = optarg;
                      break;
            case 'c': centers_filename = optarg;
                      break;
            case 'k': centers_name = optarg;
                      break;
            case 'n': numClusters = atoll(optarg);
                      break;
            case 't': threshold=atof(optarg);
                      break;
            case 'o': outFileName = optarg;
                      break;
            case 'q': verbose = 0;
                      break;
            case 'd': _debug = 1;
                      break;
            case 'h':
            default: is_print_usage = 1;
                      break;
        }
    }

    if (filename == 0 || numClusters <= 1 || is_print_usage == 1 ||
        var_name == NULL) {
        if (rank == 0) usage(argv[0], threshold);
        MPI_Finalize();
        exit(1);
    }
    if (centers_filename == NULL) centers_filename = filename;
    if (centers_name     == NULL) centers_name     = var_name;

    if (outFileName == NULL) {
        if (strcasecmp(filename+strlen(filename)-3, ".nc") == 0) {
            strcpy(outFileName, filename);
            outFileName[strlen(filename)-3] = '\0';
            strcat(outFileName, ".kmeans_out.nc");
        }
        else
            sprintf(outFileName, "%s.kmeans_out.nc", filename);
    }

    if (rank == 0 && verbose)
        printf("Will write coordinates of K=%lld cluster centers to file \"%s\"\n",
               numClusters, outFileName);

    if (_debug) printf("Proc %d of %d running on %s\n", rank, nproc, mpi_name);

    MPI_Barrier(MPI_COMM_WORLD);
    io_timing = MPI_Wtime();

    /* read data points from file -------------------------------------------*/
    err = pnetcdf_read(filename, var_name, &totalNumObjs, &numObjs, &numCoords,
                       &objects, MPI_COMM_WORLD);
    MPI_Allreduce(&err, &minErr, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
    if (err != 1) goto fn_exit;

    if (totalNumObjs < numClusters) {
        if (rank == 0)
            printf("Error: number of clusters must be larger than the number of data points to be clustered.\n");
        free(objects[0]);
        free(objects);
        goto fn_exit;
    }

    if (rank == 0 && verbose)
        printf("Read input data objects of number N=%lld from file \"%s\"\n",
               totalNumObjs, filename);

    /* checking if numObjs < nproc is done in the I/O routine */

    if (_debug) { /* print the first 4 objects' first 4 coordinates */
        int n_objs = (numObjs   < 4) ? numObjs   : 4;
        int n_coor = (numCoords < 4) ? numCoords : 4;
        for (i=0; i<n_objs; i++) {
            char strline[1024], strfloat[16];
            sprintf(strline,"%d: objects[%d]= ",rank,i);
            for (j=0; j<n_coor; j++) {
                double tmp = objects[i][j];
                sprintf(strfloat,"%10f",tmp);
                strcat(strline, strfloat);
            }
            strcat(strline, "\n");
            printf("%s",strline);
        }
    }

    /* read initial K cluster centers from file ---------------------------*/
    err = pnetcdf_read_centers(centers_filename, centers_name, numClusters,
                               numCoords, &clusters, MPI_COMM_WORLD);
    MPI_Allreduce(&err, &minErr, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
    if (err != 1) goto fn_exit;

    /* check initial cluster centers for repeatition */
    if (check_repeated_clusters(numClusters, numCoords, clusters) == 0) {
        if (rank == 0)
            printf("Error: some initial clusters are repeated. Please select distinct initial centers\n");
        MPI_Finalize();
        return 1;
    }

    if (_debug && rank == 0) {
        printf("Sorted initial cluster centers:\n");
        for (i=0; i<numClusters; i++) {
            printf("clusters[%d]=",i);
            for (j=0; j<numCoords; j++)
                printf(" %6.2f", clusters[i][j]);
            printf("\n");
        }
    }

    timing            = MPI_Wtime();
    io_timing         = timing - io_timing;
    clustering_timing = timing;

    /* membership: the cluster id for each data object */
    membership = (long long*) malloc(numObjs * sizeof(long long));
    assert(membership != NULL);

    /* start the core computation -------------------------------------------*/
    mpi_kmeans(objects, numCoords, numObjs, numClusters, threshold, membership,
               clusters, MPI_COMM_WORLD);

    free(objects[0]);
    free(objects);

    timing            = MPI_Wtime();
    clustering_timing = timing - clustering_timing;

    /* output: the coordinates of the cluster centers----------------------*/
    pnetcdf_write(outFileName, numClusters, numObjs, numCoords, clusters,
                  membership, totalNumObjs, MPI_COMM_WORLD, verbose);

    free(membership);
    free(clusters[0]);
    free(clusters);

    /*---- output performance numbers ---------------------------------------*/
    double max_io_timing, max_clustering_timing;

    io_timing += MPI_Wtime() - timing;

    /* get the max timing measured among all processes */
    MPI_Reduce(&io_timing, &max_io_timing, 1, MPI_DOUBLE,
               MPI_MAX, 0, MPI_COMM_WORLD);
    MPI_Reduce(&clustering_timing, &max_clustering_timing, 1, MPI_DOUBLE,
               MPI_MAX, 0, MPI_COMM_WORLD);

    if (rank == 0) {
        printf("\nPerforming **** Parallel Kmeans  (MPI) ****\n");
        printf("Num of processes = %d\n",   nproc);
        printf("Input file       : %s\n",   filename);
        printf("Output file      : %s\n",   outFileName);
        printf("numObjs          = %lld\n", totalNumObjs);
        printf("numCoords        = %d\n",   numCoords);
        printf("numClusters      = %lld\n", numClusters);
        printf("threshold        = %.4f\n", threshold);
        printf("I/O time         = %10.4f sec\n", max_io_timing);
        printf("Computation time = %10.4f sec\n", max_clustering_timing);
    }

fn_exit:
    MPI_Finalize();
    return(0);
}

