######################################################################## ######################### Authors: Sonja Pravilovic and Annalisa Appice ############## ######################## ######## 4. CoST – Co-Kriging– for windows before ############### ########################################PC90 – BEFORE ################################## # Remove all subject rm(list = ls()) experiment<-function(dirN,fileN,quanteN) { print(dirN) memory.limit(size=4095) # Library import library(sp); library(spacetime); library(rgeos); library(gstat); library(lattice); library(xts); library(zoo); library(geoR); library(rgl); library(scatterplot3d); library(raster) ## Set the folder for input data ######################################## # Define the folder of input data, and define folder where to save all results########## setwd(dirN) ###quante<-8 # Observed time point quante<-quanteN windows<-6 # Access the data: position of stations and data, for all datasets pos, inputdata are the in the root of folder where are data position<- read.table("pos.txt",header=T) # Select data inputdata <- read.table(fileN,header=F) # number of stations and number of time points in input inputdata<-inputdata[1:quante,] nst<-dim(position)[1]; nti<-dim(inputdata)[1]; # Definition of fold for cross validation # If the number of station is greater than 26, cross validation is leave one out, else 1/5 of stations # Definition of fold for cross validation # If the number of station is greater than 26, cross validation is leave one out, else 1/5 of stations if (nst>26) {fold <- rep_len(seq(1,5), nst)} else {fold <-seq(1,nst)} fold F<-max(fold) #Spatial position xx<-position[,2]; yy<-position[,3] rangex<-range(xx) rangey<-range(yy) rangez<-range(inputdata) podacirange<-rbind(rangex,rangey,rangez) plot(xx,yy, main="Sensors positions", xlab="Longitude",ylab="Latitude", cex.lab=1.8, lwd=2.5, lty=2, cex.axis=2.3) dev.copy(bmp,'myplot11.bmp') dev.off() # tabela is complete values of all dataset tabela<-t(inputdata); tempo<-1:nti; w<-c(1:windows); how<-length(w); tabela<-as.matrix(tabela) #############CO-KRIGING FOR SCROLING WINDOWS ############################ # Initilization sseCoK<-array(0,c(quante,how)) nsseCoK<-array(0,c(quante,how)) TrCK<-array(0,c(quante,F)) TsCK<-array(0,c(quante,F)) timeTrCK<-array(0,c(how)) timeTsCK<-array(0,c(how)) aicCK<-array(0,c(how)) aiccCK<-array(0,c(how)) bicCK<-array(0,c(how)) sseCoK<-array(0,c(how)) rsseCoK<-array(0,c(how)) observedCK<-tabela kCK<-array(0,c(how)) # All data for all time point print(tabela) # rows sensors, column time point VAR<-array(0,c(how)) for (jj in 1:how) # for each window { modelCK<-array (0, c(F,quante)) predictionCK<-array(0,c(nst,quante)) print("*********WINDOWS ********") print(jj) observedCK<-tabela for (i in 1:quante){ # for each time point print(" i TIME POINT ************") print(i) for (ii in 1:F){ # for each fold t1<-proc.time()[3] print(" FOLD ************") print(ii) #Sensors and data in training training<-which(fold!=ii) duz<-length(training) print("duz is Length of data in training") print(duz) #Sensors excluded test<-which(fold==ii) # Training data A if(i==1) { K<-1 how1<-1 } # KRIGING else { if(i-jj>0) {broj<-seq((i-jj), (i-1))} else {broj<-seq(1, (i-1))} how1<-length(broj) } if(how1>=1 & i>1) {# Data - Variables print(" Variable are one or more and there are (number of them):") print(broj) # Here is important the order of variables Value2 is more important then second # reverce order – nearer is second one iks<-length(broj) Value2<-as.matrix(tabela[training,broj[iks]:broj[1]]) kolko<-dim(Value2)[2] print("********* How many variable (counting target + other variable)********") print(iks) naziv<-paste("Value",seq(2,(iks+1)),sep="") print(naziv) colnames(Value2)<-naziv VAR[jj]<-length(naziv) A<-data.frame(id=seq(1,duz),x=xx[training],y=yy[training],Time=rep(i,each=duz), Value1=c(tabela[training,i]),Value2) print("Co-Kriging data") print(A[1:3,]) K<-2 } # CO-KRIGING WITH VAR BEFORE if(K==1 & how1==1) { A<-data.frame(id=seq(1,duz),x=xx[training],y=yy[training],Time=rep(i,each=duz), Value1=c(tabela[training,i])) print("Kriging data") VAR[jj]<-0 print(A[1:3,]) } # INPUT DATA FOR KRIGING # Variogram of the target variable: g<- gstat(id="Value1", formula = Value1~1, locations = ~x+y, data=A) options(warn = -1) # don't print warnings #Fit the variogram v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("************BEST MODEL FIT *************") print(best.fit) #Spatial grid grd<-data.frame(x=c(xx), y=c(yy)) if(dim(A)[2]>5){ #CO-KRIGING #repeat { print("CO – KRIGING ") print(" ******WINDOWS SIZE ************") print(jj) print(" *******i ti snapshot ************") print(i) print("****** ii ti FOLD ************") print(ii) print("************Variogram for the target variable with models ordered *************") print(variogram(g)) t3<-proc.time()[3] print("************Best fit model is co-kriging ************") # We compute multivariate variogram (g1): print("When the input data has second variable, we have to do co-kriging, otherwise kriging") # compute co-kriging by considering 6 variables at worst g1<- gstat(id="Value1", formula = Value1~1, locations = ~x+y, data =A) g1<- gstat(g1, id="Value2", formula = Value2~1, locations = ~x+y, data =A) if(kolko>=2) {g1<- gstat(g1, id="Value3", formula = Value3~1, locations = ~x+y, data =A)} if(kolko>=3) {g1<- gstat(g1, id="Value4", formula = Value4~1, locations = ~x+y, data =A)} if(kolko>=4) {g1<- gstat(g1, id="Value5", formula = Value5~1, locations = ~x+y, data =A)} if(kolko>=5) {g1<- gstat(g1, id="Value6", formula = Value6~1, locations = ~x+y, data =A)} if(kolko>=6) {g1<- gstat(g1, id="Value7", formula = Value7~1, locations = ~x+y, data =A)} plot(variogram(g1), cutoff=1000, width=50) print("***************** VARIOGRAMS *******************") # Variograms for all variables: print("************Variogram for all variables *************") vm<-variogram(g1) print(vm) # Fit the multivariated co-kriging model print("************FIT OF ALL VARIOGRAM ************") rsco<-(tryCatch(fit.lmc(vm, g1, model= best.fit, fit.ranges=FALSE, fit.lmc=TRUE, correct.diagonal=1.02), error=function(e) NULL)) print(rsco) if(is.null(rsco) ==TRUE) { ###AA if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("************Variogram *************") print(variogram(g)) rsco<-(tryCatch(fit.lmc(vm, g1, model= best.fit, fit.ranges=FALSE, fit.lmc=TRUE, correct.diagonal=1.02), error=function(e) NULL)) print(rsco) } #else { print("This model fit.lmc - CO-KRIGING IS NOT NULL") # CO-KRIGING FIT: vm.fit<-rsco plot(variogram(g1), best.fit, cutoff=1000, width=50) # Fit a Linear Model of Coregionalization to a Multivariable Sample Variogram ck <- predict(vm.fit, grd) print("************Prediction CO-KRIGING*************") print(test) print(ck$Value1.pred[test]) # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ck$Value1.pred[test])==TRUE)) # Run time for training – finish here: CO-KRIGING if(S>0) { if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("************Variogram *************") print(variogram(g)) vm.fit<-(tryCatch(fit.lmc(vm, g1, model= best.fit, fit.ranges=FALSE, fit.lmc=TRUE, correct.diagonal=1.02), error=function(e) NULL)) print(rsco) #Plot fitted variograms to all the sample variograms with best model selected: plot(variogram(g1), best.fit, cutoff=1000, width=50) # Fit a Linear Model of Coregionalization to a Multivariable Sample Variogram ck <- predict(vm.fit, grd) print("************Prediction CO-KRIGING*************") print(test) print(ck$Value1.pred[test]) # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ck$Value1.pred[test])==TRUE)) #### } #else { t2<-proc.time()[3] # end time for training in CO-KRIGING TrCK[i,ii]<-t2-t1 # Run time for interpolation – start here: CO-KRIGING t3<-proc.time()[3] ck <- predict(vm.fit, grd) # Prediction for test data: print("************Prediction done*************") print(ck$Value1.pred[test]) t4<-proc.time()[3] # Run time for test – finish here: CO-KRIGING TsCK[i,ii]<-t4-t3 print("************TEST DATA*************") print(test) # Prediction for test data: predictionCK[test,i]<-ck$Value1.pred[test] print("************Prediction in CO-KRIGING *************") print(predictionCK[test,i]) print("************Best fit model in CO-kriging ************") modelCK[ii,i]<-as.character(best.fit[2,1]) # Best model in CO-KRIGING print(modelCK[ii,i]) } } # end: checked if co-kriging produced prediction, and if not, try other model } # END OF REPEAT IN CO-KRIGING print("******************** RESULTS FOR KRIGING - CO-KRIGING *************************") t4<-proc.time()[3] print("******************** FOR BLOCK*************************") print(jj) print("******************** FOR TIME POINT*************************") print(i) print("******************** FOR FOLD*************************") print(test) predictionCK[test,i]<-ck$Value1.pred[test] #Test time in CO-KRIGING TsCK[i,ii]<-(t4-t3) } ### END OF CO-KRIGING else { # KRIGING #Test time in only kriging t3<-proc.time()[3] # ONLY KRIGING FOR THE FIRST TIME POINT FOR EACH WINDOWS SIZE #repeat { print("KRIGING - FIRST TIME POINT ") print(" ******WINDOWS SIZE ************") print(jj) print(" *******i ti snapshot ************") print(i) print("****** ii ti FOLD ************") print(ii) #Fit the variogram v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, warn.if.neg=FALSE, fit.kappa=FALSE) print("************Variogram when we have only kriging*************") print(variogram(g)) best.fit<-v.fit print("************BEST MODEL FIT *************") print(best.fit) # Sometimes happen that all five models are NULL so we force range to be max(variogram distance)/3 if(best.fit$range[2]<=0) { #print("***TRY ALL MODELS AND EACH OF THEM HAS THE RANGE NEGATIVE") best.fit$range[2]<- max(variogram(g)$dist)/3 } rsk<-(tryCatch(krige(id="Value1",Value1~1,locations=~x+y,model=best.fit,data=A,newdata=grd), error=function(e) NULL)) print(rsk) if(is.null(rsk) ==TRUE) { if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } print("************Variogram when we have only kriging*************") print(variogram(g)) best.fit<-v.fit print("************BEST MODEL FIT *************") print(best.fit) # Sometimes happen that all five models are NULL so we force range to be max(variogram distance)/3 if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } } #else { print("This model FIT is NOT null") # Ordinary kriging: ck<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); # Need to check if the ordinary kriging for test data produce NA, and if it does we need to use other model print("************Prediction have NA values or not - CONTROL *************") # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ck$Value1.pred[test])==TRUE)) if(S>0) { print("Prediction are NA - You have to try WITHOUT THE LINEAR MODEL") if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } # Ordinary kriging: ck<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); } #else { # Run time for training – finish here: t2<-proc.time()[3] TrCK[i,ii]<-t2-t1 # Run time for interpolation – start here: t3<-proc.time()[3] # Intepolation ck<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); # Prediction for test data: t4<-proc.time()[3] print("************Prediction done*************") print(ck$Value1.pred[test]); modelCK[ii,i]<-as.character(best.fit[2,1]) # Run time for test – finish here: TsCK[i,ii]<-t4-t3 print("************TEST DATA*************") print(test) # Prediction for test data: predictionCK[test,i]<-ck$Value1.pred[test] } } # end: checked if ok produce prediction, and if not, try other models } # end of repeat # REPEAT OF KRIGING } # KRIGING WE HAVE ONLY ONE VARIABLE } # END OF FOLD } ### END SNAPSHOT # For each windows save te PREDICTION namePred<-paste(jj, "-PredictionCK-ALL-BEFORE.csv",sep="") write.csv(predictionCK, file =namePred) sseCK<-(predictionCK-observedCK)^2 sseCK<-sum(sseCK) rsseCK<-sqrt(sseCK/(nst*quante)) sseCoK[jj]<-sseCK rsseCoK[jj]<-rsseCK timeTrCK[jj]<-sum(TrCK) timeTsCK[jj]<-sum(TsCK) # For different windows size, save the models fold namemod<-paste(jj, "-modelCK-ALL-BEFORE.csv",sep="") write.csv(modelCK, file =namemod) # Computation of AIC, AICc, BIC print("*****Nuber of parameters in CO-KRIGING ALL VARABLE BEFORE*********") } # END WINDOWS jj plot(variogram(g1), vm.fit, main="Best fit model for co-kriging using ALL B of time point before") dev.copy(bmp,'ALL Before.bmp') dev.off() plot(variogram(g1), vm.fit, main="Best fit model for co-kriging ALL B") dev.copy(bmp,'Fitted variogram ALL B.bmp') dev.off() plot(variogram(g1), vm.fit, main="Best fit model for co-kriging – ALL BEFORE", xlab="Distances",ylab="Covariance", col="black",cex.lab=15, lwd=2, lty=1, cex.axis=15, pcx=5) dev.copy(bmp,'ALL Before.bmp') dev.off() # Number of parameters in Co-Kriging-ALL-before for(jj in 1:how){ ### in VAR[jj] v'è la lunghezza della finestra temp<-0 for( tempI in 1:VAR[jj]) { temp<-temp+(tempI*(tempI+1)*1.5+1) } temp<-temp+(((VAR[jj]+1)*(VAR[jj]+2)*1.5+1)*(quante-VAR[jj])) temp<-temp*F kCK[jj]<-temp ### #kCK[jj]<-sum(1.5*(VAR[jj]*(VAR[jj]+1))+1 +(1.5*(how+1)*(how+2)+1)*(quante-how)*F) print(paste("kcK",kCK)) aicCK[jj]<-nst*quante*log(sseCoK[jj]/(nst*quante))+2*kCK[jj] aiccCK[jj]<-aicCK[jj]+(2*kCK[jj]*(kCK[jj]+1))/(nst*quante-kCK[jj]-1) bicCK[jj]<-nst*quante*log(sseCoK[jj]/(nst*quante))+log(nst*quante)*kCK[jj] } rsseCK # Order of data in results: sseCK, rsseCK, aicCK, aiccCK, bicCK, totalTrCK, totalTsCK) results<-cbind( sseCoK, rsseCoK, aicCK, aiccCK, bicCK, timeTrCK, timeTsCK, VAR, kCK ) write.csv(results, file = "resultsCK-ALL-BEFORE.csv") getwd() results } # end function experiment dir<-c("C:\\ARPA","C:\\ARPAACQUA","C:\\MESA","C:\\NCDCS","C:\\NCDCP","C:\\NCDCT","C:\\TCEQW","C:\\TCEQO","C:\\TCEQT","C:\\SAC","C:\\SR") file<-c("fondo.txt","acqua.txt","pol.txt","sol.txt","prec.txt","temp.txt","wind.txt","ozone.txt","temp.txt","temp.txt","dif.txt") quanteList<-c(8,8,48,48,48,48,48,48,48,12,12) dirI<-1 exp<-experiment(dir[dirI],file[dirI],quanteList[dirI])