######################################################################## ######################### Authors: Sonja Pravilovic and Annalisa Appice ############## ######################## ######## KRIGING ############### # Remove all subject rm(list = ls()) experiment<-function(dirN,fileN,quanteN) { print(dirN) # Library import library(sp); library(spacetime); library(rgeos); library(gstat); library(lattice); library(xts); library(zoo); library(geoR); library(rgl); library(scatterplot3d); library(raster) ## Set the folder for input data ######################################## # Define the folder of input data, and define folder where to save all results########## setwd(dirN) quante<-quanteN # Access the data: position of stations and data, for all datasets pos, inputdata are the in the root of folder where are data position<- read.table("pos.txt",header=T) # Select data inputdata <- read.table(fileN,header=F) # number of stations and number of time points in input inputdata<-inputdata[1:quante,] # number of stations and number of time points in input nst<-dim(position)[1]; nti<-dim(inputdata)[1]; # Definition of fold for cross validation # If the number of station is greater than 26, cross validation is leave one out, else 1/5 of stations if (nst>26) {fold <- rep_len(seq(1,5), nst)} else {fold <-seq(1,nst)} fold #Spatial position xx<-position[,2]; yy<-position[,3] rangex<-range(xx) rangey<-range(yy) rangez<-range(inputdata) podacirange<-rbind(rangex,rangey,rangez) write.csv(podacirange, file = "range.csv") plot(xx,yy, main="Sensors positions", xlab="Longitude",ylab="Latitude", cex.lab=1.8, lwd=2.5, lty=2, cex.axis=2.3) dev.copy(bmp,'network.bmp') dev.off() # tabela is complete values of all dataset for defined time points tabela<-t(inputdata) tempo<-1:nti #########################KRIGING FOR EACH TIME POINT #################### # Definition of all data – compute kriging for each time point for training, and perform interpolation kriging for test data # Initialization F<-max(fold) kK<-4*quante*F # Number of parameters in Kriging predictionK<-array (0, c(nst,quante)); timeTrK<-array (0, c(quante,F)) timeTsK<-array (0, c(quante,F)) modelK<-array (0, c(F,quante)) countK<-0 # count how many times fit for variogram doesn’t work countKS<-0 # count how many times KRIGING prediction doesn’t’t work BR<-0 # count how many time all five models doesn’t work and fit range to max.distance variogram/3 #AA #for (i in 1:quante){ # For each time point for (i in 1:quante){ # For each time point for (ii in 1:max(fold)){ # for each fold print(" i ti time point ************") print(i) print(" ii ti fold ************") print(ii) #Sensors and data in training training<-which(fold!=ii) #Sensor/s excluded test<-which(fold==ii) print(" test ************") aa<-data.frame(id=seq(1,nst),x=xx,y=yy, time=i, Value1=c(tabela[,i])) print ("*********** aa – all data ******************") #Definition of spatial grid grd<-data.frame(aa[,2:3]) print ("*********** INPUT OF ALL DATA **************") print(grd[1:3,]) print ("*****VARIOGRAM FOR TRAINING DATA (ONE OR SOME OUT)****") # A is the data.frame of training data t1<-proc.time()[3] A<-aa[training,] # Fit a model variogram for target variable g<-gstat(id="Value1", formula = Value1~1, locations=~x+y, data = A) print("*********** VARIOGRAM SELECTION FOR TARGET VARIABLE **********") options(warn = -1) # don't print warnings # options (warn = 0) # print warnings ##AA v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("************BEST MODEL FIT *************") print(best.fit) rs<-(tryCatch(krige(id="Value1",Value1~1,locations=~x+y,model=best.fit,data=A,newdata=grd), error=function(e) NULL)) #print(rs) if(is.null(rs)==TRUE) { print("TRYCATCH FAILED DUE TO SELECTED MODEL") if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("################BEST MODEL FIT WITHOUT LINEAR ##############") print(best.fit) #rs<-(tryCatch(krige(id="Value1",Value1~1,locations=~x+y,model=best.fit,data=A,newdata=grd), error=function(e) NULL)) #print(rs) } #else { print("This model FIT is NOT null") # Ordinary kriging: ok<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); # Need to check if the ordinary kriging for test data produce NA, and if it does we need to use other model print("************Prediction have NA values or not - CONTROL *************") # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ok$Value1.pred[test])==TRUE)) if(S>0) { print("Prediction are NA - You have to try other models set the error of this one to 1e+30") if(as.character(best.fit[2,1])=="Lin") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp", "Sph", "Mat", "Gau")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Exp") { v.fit <- fit.variogram(variogram(g), vgm(c("Sph", "Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Mat") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Gau") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Sph", "Mat", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } if(as.character(best.fit[2,1])=="Sph") { v.fit <- fit.variogram(variogram(g), vgm(c("Exp","Mat", "Gau", "Lin")), fit.sills=TRUE, fit.ranges=TRUE, fit.method=7, debug.level=2, warn.if.neg=FALSE, fit.kappa=FALSE) } best.fit<-v.fit if(best.fit$range[2]<=0) { best.fit$range[2]<- max(variogram(g)$dist)/3 } print("################BEST MODEL FIT WITHOUT LINEAR ##############") print(best.fit) print("This model FIT is NOT null") # Ordinary kriging: ok<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); # Need to check if the ordinary kriging for test data produce NA, and if it does we need to use other model print("************Prediction have NA values or not - CONTROL *************") # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(ok$Value1.pred[test])==TRUE)) S } #else { # Run time for training – finish here: t2<-proc.time()[3] timeTrK[i,ii]<-t2-t1 # Run time for interpolation – start here: t3<-proc.time()[3] # Intepolation ok<-krige(id="Value1",Value1~1, locations=~x+y, model=best.fit, data=A, newdata=grd); # Prediction for test data: print("************Prediction done*************") print(ok$Value1.pred[test]); modelK[ii,i]<-as.character(best.fit[2,1]) t4<-proc.time()[3] # Run time for test – finish here: timeTsK[i,ii]<-t4-t3 print("************TEST DATA*************") print(test) # Prediction for test data: predictionK[test,i]<-ok$Value1.pred[test] m=5 } # end: checked if ok produce prediction, and if not, try other models } ## } # ii ti fold } # i ti snapshot # compute SSE observedK<-tabela print("observed") observedK predictionK sseK<-(observedK-predictionK)^2 sseK # sseK is the sum of square errors sseK<-sum(sseK) print(sseK) print(nst) print(quante) print(kK) # rsseK is root square errors of sqrt(sse/(quante*nst)) rsseK<-sqrt(sseK/(nst*quante)) rsseK print(rsseK) print ("*********** SAVE THE RESULTS FOR KRIGING ********") plot(variogram(g), best.fit, main="Best fit model for kriging") dev.copy(bmp,'variogramK5CV.bmp') dev.off() print("******* TIME IN CROSS VALIDATION FOR TRAINING AND TEST****************") totalTrK<-sum(timeTrK) totalTsK<-sum(timeTsK) write.csv (predictionK, file = "predictionK.csv") observedData<-t(inputdata) write.csv (observedData, file = "observedData.csv") write.csv (modelK, file = "modelK.csv") sseK aicK<-nst*quante*log(sseK/(nst*quante))+2*kK print(aicK) aiccK<-aicK+(2*kK*(kK+1))/(nst*quante-kK-1) bicK<-nst*quante*log(sseK/(nst*quante))+log(nst*quante)*kK # Order of data in results: countK, countKS, sseK, rsseK, aicK, aiccK, bicK, totalTrK, totalTsK) results<-cbind(countK, countKS, sseK, rsseK, aicK, aiccK, bicK, totalTrK, totalTsK) write.csv(results, file = "resultsK.csv") results getwd() BR } # end function experiment dir<-c("C:\\Kriging -Script\\exp\\ARPA","C:\\Kriging -Script\\exp\\ARPAACQUA","C:\\Kriging -Script\\exp\\MESA","C:\\Kriging -Script\\exp\\NCDCS","C:\\Kriging -Script\\exp\\NCDCP","C:\\Kriging -Script\\exp\\NCDCT","C:\\Kriging -Script\\exp\\TCEQW","C:\\Kriging -Script\\exp\\TCEQO","C:\\Kriging -Script\\exp\\TCEQT","C:\\Kriging -Script\\exp\\SAC","C:\\Kriging -Script\\exp\\SR") file<-c("fondo.txt","acqua.txt","pol.txt","sol.txt","prec.txt","temp.txt","wind.txt","ozone.txt","temp.txt","temp.txt","dif.txt") quanteList<-c(8,8,48,48,48,48,48,48,48,12,12) dirI<-1 exp<-experiment(dir[dirI],file[dirI],quanteList[dirI]) ########