#!/usr/bin/env Rscript
# Copyright (c) 2011-2012, Universite de Versailles St-Quentin-en-Yvelines
#
# This file is part of ASK.  ASK is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

suppressPackageStartupMessages(require(tgp, quietly=T))
suppressPackageStartupMessages(require(lhs, quietly=T))

#### Define some utility functions
tgp_sample <- function(X, Z, available, n) {
  #build D optimal design according to MAP tree
  tgpout <- btgp(X = X, Z = Z, pred.n = FALSE, verb = T)
  design = tgp.design(n, available, tgpout)

  #Apply ALC to the selected D optimal points
  tgpout <- btgpllm(X, Z, XX=design, verb=T, Ds2x = T)
  rated = data.frame(design, SORTBY=tgpout$Ds2x)
  # Sort by variance
  rated = rated[order(rated$SORTBY, decreasing=T),]
  # Select the n points with the highest variance
  return(rated[1:n,1:ncol(rated)-1])
}

# Parse the arguments
args <- commandArgs(trailingOnly = T)
if (length(args) != 3) {
    stop("Usage: file <configuration.conf> <input_file> <output_file>")
}

source(file.path(Sys.getenv("ASKHOME"), "common/configuration.R"))


# Read the data
data = read.table(args[2])
nsamples= conf("modules.sampler.params.n",10)

# Choose a 10*nsamples lhs set of points
lf = list()
mins = c()
maxs = c()
for (f in conf("factors")) {
  if (f$type == "categorical") {
    mins = c(mins, 0)
    maxs = c(maxs, length(f$values))
  } else {
    mins = c(mins, f$range$min)
    maxs = c(maxs, f$range$max+1)
  }
}

available <- lhs(20*nsamples, cbind(mins,maxs))

i = 1
for (f in conf("factors")) {
    if (f$type != "float") {
        available[,i] = as.integer(available[,i])
    }
    i = i+1
}

# Rename available columns from Vari -> Vi so they are consistent
# with the input
i = 1
for (f in conf("factors")) {
  names(available)[names(available)==paste("Var",i,sep="")] <- paste("V",i,sep="")
  i = i + 1
}

# split data in explicative variables, X, and response, Z.
X = data[,-ncol(data)]
Z = data[,ncol(data)]

newsamples <- tgp_sample(X, Z, available,n=nsamples)

write.table(newsamples,
            file=args[3],
            row.names=F,
            col.names=F)
