######################################################################## ######################### Authors: Sonja Pravilovic and Annalisa Appice ############## ######################## ######## 4. CoST – Co-Kriging– for windows before ############### ########################################PC90 – BEFORE ################################## # Remove all subject rm(list = ls()) experiment<-function(dirN,fileN,quanteN) { print(dirN) # Library import library(sp); library(spacetime); library(rgeos); library(gstat); library(lattice); library(xts); library(zoo); library(geoR); library(rgl); library(scatterplot3d); library(raster) ## Set the folder for input data ######################################## # Define the folder of input data, and define folder where to save all results########## setwd(dirN) quante<-quanteN windows<-6 # Access the data: position of stations and data, for all datasets pos, inputdata are the in the root of folder where are data position<- read.table("pos.txt",header=T) # Select data inputdata <- read.table(fileN,header=F) # number of stations and number of time points in input inputdata<-inputdata[1:quante,] nst<-dim(position)[1]; nti<-dim(inputdata)[1]; # Definition of fold for cross validation # If the number of station is greater than 26, cross validation is leave one out, else 1/5 of stations # Definition of fold for cross validation # If the number of station is greater than 26, cross validation is leave one out, else 1/5 of stations if (nst>26) {fold <- rep_len(seq(1,5), nst)} else {fold <-seq(1,nst)} fold F<-max(fold) #Spatial position xx<-position[,2]; yy<-position[,3] rangex<-range(xx) rangey<-range(yy) rangez<-range(inputdata) podacirange<-rbind(rangex,rangey,rangez) plot(xx,yy, main="Sensors positions", xlab="Longitude",ylab="Latitude", cex.lab=1.8, lwd=2.5, lty=2, cex.axis=2.3) dev.copy(bmp,'myplot11.bmp') dev.off() # tabela is complete values of all dataset tabela<-t(inputdata); tempo<-1:nti; w<-c(1:windows); how<-length(w); tabela<-as.matrix(tabela) #############CO-KRIGING FOR SCROLING WINDOWS ############################ # Initilization sseCoK<-array(0,c(quante,how)) nsseCoK<-array(0,c(quante,how)) TrCK<-array(0,c(quante)) TsCK<-array(0,c(quante)) timeTrCK<-array(0,c(how)) timeTsCK<-array(0,c(how)) aicCK<-array(0,c(how)) aiccCK<-array(0,c(how)) bicCK<-array(0,c(how)) countK<-array(0,c(how)) # counts how many times doesn’t produce prediction in Kriging countCK<-array(0,c(how)) # counts how many times doesn’t produce prediction in CO-Kriging sseCoK<-array(0,c(how)) rsseCoK<-array(0,c(how)) observedCK<-tabela kCK<-array(0,c(how)) BR<-0 # count how many time all five models in KRIGING have ranges negative BRCO<-0 # count how many time all five models in CO-KRIGING have range negative # All howdata for all time point print(tabela) # rows sensors, column time point for (jj in 1:how) # for each window { kCK[jj]<-(4+10*(quante-1))*F # Number of parameters in Co-Kriging-PC1-before modelCK<-array (0, c(F,quante)) predictionCK<-array(0,c(nst,quante)) print("*********WINDOWS *******") print(jj) observedCK<-tabela for (i in 1:quante) { # for each time point print(" i TIME POINT ************") print(i) # for (ii in 1:F) # { # for each fold t1<-proc.time()[3] print(" FOLD ************") # print(ii) #Sensors and data in training # training<-which(fold!=ii) duz<-nst print("duz is Length of data in training") print(duz) #Sensors excluded # test<-which(fold==ii) if(i==1) #time point =1 { K<-1 how<-0 } # KRIGING if(i>1) # time point>1, CO-KRIGING { if(i-jj>0) { broj<-seq((i-jj), (i-1)) } else { broj<-seq(1, (i-1)) } how<-length(broj) } print(how) if(how>=2 & i>1) #DOING PCA { # Data in PCA print("Variable in PC are more than one and there are:") print(broj) seconda<-as.matrix(tabela[,broj]) nv<-prcomp(seconda); Value2<-as.vector(nv$x[,1]) print("PCA1 of all variables in PCA") K<-2 print("length of Value2") dimTR2<-length(Value2) print(dimTR2) } # CO-KRIGING WITH PC1 if(how==1 & i>1) #DOING PCA with window size=1 { # Only one variable before – haven’t PCA print("There is not PCA – only one variable before and it is:") print(broj) Value2<-tabela[, broj] print("broj") print(broj) K<-2 } # CO-KRIGING WITHOUT PC if(K==1) { A<-data.frame(id=seq(1,duz),x=xx[],y=yy[],Time=rep(i,each=duz), Value1=c(tabela[,i])) print("Kriging data") print(A[1,]) } # INPUT DATA FOR KRIGING if(K==2) { A<-data.frame(id=seq(1,duz),x=xx[],y=yy[],Time=rep(i,each=duz), Value1=c(tabela[,i]),Value2= c(Value2)) print("Co-kriging") print(A[1,]) } # INPUT DATA FOR CO-KRIGING # Variogram of the target variable: g<- gstat(id="Value1", formula = Value1~1, locations = ~x+y, data=A) #Fit the variogram v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("************Variogram *************") print(variogram(g)) # Best fit model for target variable #best.fit<-v.fit print("************BEST MODEL FIT *************") print(best.fit) #Spatial grid grd<-data.frame(x=c(xx), y=c(yy)) if(dim(A)[2]>5) { #CO-KRIGING #repeat { print("CO – KRIGING ") print(" ******WINDOWS SIZE ************") print(jj) print(" *******i ti snapshot ************") print(i) print("****** ii ti FOLD ************") # print(ii) print("****** model ************") print("************Variogram for the target variable with models ordered *************") print(variogram(g)) t<-proc.time()[3] print("************Best fit model is co-kriging ************") # We compute multivariate variogram (g1): print("When the input data has second variable, we have to do co-kriging, otherwise kriging") g1<- gstat(id="Value1", formula = Value1~1, locations = ~x+y, data =A) g1<- gstat(g1, id="Value2", formula = Value2~1, locations = ~x+y, data =A) # Variogram for all variables plot(variogram(g1), cutoff=1000, width=50) print("***************** VARIOGRAMS *******************") # Variograms for all variables: print("************Variogram for all variables *************") vm<-variogram(g1) print(vm) # Fit the multivariated co-kriging model print("************FIT OF ALL VARIOGRAM ************") rsco<-(tryCatch(fit.lmc(vm, g1, model= best.fit, fit.ranges=FALSE, fit.lmc=TRUE, correct.diagonal=1.02), error=function(e) NULL)) print(rsco) if(is.null(rsco) ==TRUE) { ###AA print("###############TRYCATCH FAILED DUE TO SELECTED MODEL###########") ###AA print("TRYCATCH FAILED DUE TO SELECTED MODEL") if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } ###v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("************Variogram *************") print(variogram(g)) rsco<-(tryCatch(fit.lmc(vm, g1, model= best.fit, fit.ranges=FALSE, fit.lmc=TRUE, correct.diagonal=1.02), error=function(e) NULL)) print(rsco) } #####else { print("This model fit.lmc - CO-KRIGING IS NOT NULL") vm.fit<-rsco #Plot fitted variograms to all the sample variograms with best model selected: plot(variogram(g1), best.fit, cutoff=1000, width=50) # Fit a Linear Model of Coregionalization to a Multivariable Sample Variogram ck <- predict(vm.fit, grd) print("************Prediction CO-KRIGING*************") # print(test) print(ck$Value1.pred[]) # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ck$Value1.pred[])==TRUE)) # Run time for training – finish here: CO-KRIGING if(S>0) { print("#########Prediction FAILED DUE TO SELECTED MODEL, TRY WITHOUT THE SELECTED MODEL!") ###AA print("TRYCATCH FAILED DUE TO SELECTED MODEL") if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } #v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("************Variogram *************") print(variogram(g)) vm.fit<-(tryCatch(fit.lmc(vm, g1, model= best.fit, fit.ranges=FALSE, fit.lmc=TRUE, correct.diagonal=1.02), error=function(e) NULL)) print(rsco) #Plot fitted variograms to all the sample variograms with best model selected: plot(variogram(g1), best.fit, cutoff=1000, width=50) # Fit a Linear Model of Coregionalization to a Multivariable Sample Variogram ck <- predict(vm.fit, grd) print("************Prediction CO-KRIGING*************") # print(test) print(ck$Value1.pred[]) # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ck$Value1.pred[])==TRUE)) } #else { t2<-proc.time()[3] # end time for training in CO-KRIGING TrCK[i]<-t2-t1 # Run time for interpolation – start here: CO-KRIGING t3<-proc.time()[3] ck <- predict(vm.fit, grd) # Prediction for test data: print("************Prediction done*************") print(ck$Value1.pred[]) t4<-proc.time()[3] # Run time for test – finish here: CO-KRIGING TsCK[i]<-t4-t3 print("************TEST DATA*************") # print(test) # Prediction for test data: predictionCK[,i]<-ck$Value1.pred[] print("************Prediction in CO-KRIGING *************") print(predictionCK[,i]) print("************Best fit model in CO-kriging ************") modelCK[,i]<-as.character(best.fit[2,1]) # Best model in CO-KRIGING print(modelCK[ ,i]) } } # end: checked if co-kriging produced prediction, and if not, try other model } # END OF REPEAT IN CO-KRIGING print("******************** RESULTS FOR KRIGING - CO-KRIGING *************************") t4<-proc.time()[3] print("******************** FOR BLOCK*************************") print(jj) print("******************** FOR TIME POINT*************************") print(i) print("******************** FOR FOLD*************************") # print(test) predictionCK[,i]<-ck$Value1.pred[] #Test time in CO-KRIGING TsCK[i]<-(t4-t3) } ### END OF CO-KRIGING else { # KRIGING #Test time in only kriging t3<-proc.time()[3] # ONLY KRIGING FOR THE FIRST TIME POINT FOR EACH WINDOWS SIZE #m=1 #brojac<-0 # count how many time all 5 models range are negative - KRIGING 1.time point #repeat { print("KRIGING - FIRST TIME POINT ") print(" ******WINDOWS SIZE ************") print(jj) print(" *******i ti snapshot ************") print(i) print("****** ii ti FOLD ************") # print(ii) #Fit the variogram v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, warn.if.neg=FALSE, fit.kappa=FALSE) print("************Variogram when we have only kriging*************") print(variogram(g)) best.fit<-v.fit print("************BEST MODEL FIT *************") print(best.fit) # Sometimes happen that all five models are NULL so we force range to be max(variogram distance)/3 if(best.fit$range[2]<=0) { #print("***TRY ALL MODELS AND EACH OF THEM HAS THE RANGE NEGATIVE") best.fit$range[2]<- max(variogram(g)$dist)/3 } rsk<-(tryCatch(krige(id="Value1",Value1~1,locations=~x+y,model=best.fit,data=A,newdata=grd), error=function(e) NULL)) print(rsk) if(is.null(rsk) ==TRUE) { print("TRYCATCH FAILED DUE TO SELECTED MODEL") if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } print("************Variogram when we have only kriging*************") print(variogram(g)) best.fit<-v.fit print("************BEST MODEL FIT *************") print(best.fit) # Sometimes happen that all five models are NULL so we force range to be max(variogram distance)/3 if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } } #else { print("This model FIT is NOT null") # Ordinary kriging: ck<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); # Need to check if the ordinary kriging for test data produce NA, and if it does we need to use other model print("************Prediction have NA values or not - CONTROL *************") # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ck$Value1.pred[])==TRUE)) if(S>0) { print("TRYCATCH FAILED DUE TO SELECTED MODEL") if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } # Ordinary kriging: ck<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); } #else { # Run time for training – finish here: t2<-proc.time()[3] TrCK[i]<-t2-t1 # Run time for interpolation – start here: t3<-proc.time()[3] # Intepolation ck<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); # Prediction for test data: t4<-proc.time()[3] print("************Prediction done*************") print(ck$Value1.pred[]); modelCK[,i]<-as.character(best.fit[2,1]) # Run time for test – finish here: TsCK[i]<-t4-t3 print("************TEST DATA*************") # print(test) # Prediction for test data: predictionCK[,i]<-ck$Value1.pred[] } } # end: checked if ok produce prediction, and if not, try other models } # end of repeat # REPEAT OF KRIGING } # KRIGING WE HAVE ONLY ONE VARIABLE # } # END OF FOLD } ### END SNAPSHOT # For each windows save te PREDICTION # namePred<-paste(jj, "-PredictionCK-PC1-BEFORE.csv",sep="") # write.csv(predictionCK, file =namePred) sseCK<-(predictionCK-observedCK)^2 sseCK<-sum(sseCK) rsseCK<-sqrt(sseCK/(nst*quante)) sseCoK[jj]<-sseCK rsseCoK[jj]<-rsseCK timeTrCK[jj]<-sum(TrCK) timeTsCK[jj]<-sum(TsCK) # For different windows size, save the models fold # namemod<-paste(jj, "-modelCK-PC1-BEFORE.csv",sep="") # write.csv(modelCK, file =namemod) # Computation of AIC, AICc, BIC aicCK[jj]<-nst*quante*log(sseCoK[jj]/(nst*quante))+2*kCK[jj] aiccCK[jj]<-aicCK[jj]+(2*kCK[jj]*(kCK[jj]+1))/(nst*quante-kCK[jj]-1) bicCK[jj]<-nst*quante*log(sseCoK[jj]/(nst*quante))+log(nst*quante)*kCK[jj] } # END WINDOWS jj # plot(variogram(g1), vm.fit, main="Best fit model for co-kriging using PCA of time point before") # dev.copy(bmp,'myplot47.bmp') # dev.off() # plot(variogram(g1), vm.fit, main="Best fit model for co-kriging PCA") # dev.copy(bmp,'myplot47.bmp') # dev.off() # plot(variogram(g1), vm.fit, main="Best fit model for co-kriging - PCA BEFORE", xlab="Distances",ylab="Covariance", col="black",cex.lab=15, lwd=2, lty=1, cex.axis=15, pcx=5) # dev.copy(bmp,'myplot48.bmp') # dev.off() rsseCK countCK # Order of data in results: countK, countCK, sseCK, rsseCK, aicCK, aiccCK, bicCK, totalTrCK, totalTsCK) results<-cbind(timeTrCK, timeTsCK ) write.csv(results, file = "TIME-PC1-BEFORE.csv") getwd() results BRCO BR } # end function experiment dir<-dir<-c("C:\\Kriging -Script\\time\\exp\\ARPA","C:\\Kriging -Script\\time\\exp\\ARPAACQUA","C:\\Kriging -Script\\time\\exp\\MESA","C:\\Kriging -Script\\time\\exp\\NCDCS","C:\\Kriging -Script\\time\\exp\\NCDCP","C:\\Kriging -Script\\time\\exp\\NCDCT","C:\\Kriging -Script\\time\\exp\\TCEQW","C:\\Kriging -Script\\time\\exp\\TCEQO","C:\\Kriging -Script\\time\\exp\\TCEQT","C:\\Kriging -Script\\time\\exp\\SAC","C:\\Kriging -Script\\time\\exp\\SR") file<-c("fondo.txt","acqua.txt","pol.txt","sol.txt","prec.txt","temp.txt","wind.txt","ozone.txt","temp.txt","temp.txt","dif.txt") quanteList<-c(8,8,48,48,48,48,48,48,48,12,12) dirI<-1 exp<-experiment(dir[dirI],file[dirI],quanteList[dirI]) ########