######################### Authors: Sonja Pravilovic and Annalisa Appice ############## ######################## ######## STKriging ############### # Remove all subject rm(list = ls()) experiment<-function(dirN,fileN,quanteN,p) { # Library import library(sp); library(spacetime); library(rgeos); library(gstat); library(lattice); library(xts); library(zoo); library(geoR); library(rgl); library(scatterplot3d); library(raster) ## Set the folder for input data ######################################## # Define the folder of input data, and define folder where to save all results########## setwd(dirN) quante<-quanteN # Select number of time point # Access the data: position of stations and data, for all datasets pos, inputdata are the in the root of folder where are data #Select data position<- read.table("pos.txt",header=T) inputdata <- read.table(fileN,header=F) # number of stations and number of time points in input inputdata<-inputdata[1:quante,] nst<-dim(position)[1]; nti<-dim(inputdata)[1]; # Definition of fold for cross validation # If the number of station is greater than 26, cross validation is leave one out, else 1/5 of stations if (nst>26) {fold<- rep_len(seq(1,5), nst)} else {fold <-seq(1,nst)} F<-max(fold) kST<-9*F # Setup the number of parameters to compute AIC, AICc and BIC #Spatial position xx<-position[,2]; yy<-position[,3] plot(xx,yy, main="Sensors positions", xlab="Longitude",ylab="Latitude", cex.lab=1.8, lwd=2.5, lty=2, cex.axis=2.3) #dev.copy(bmp, 'Network sensor.bmp') #dev.off() # tabela is complete values of each dataset dif<-max(inputdata)-min(inputdata) tabela<-t(inputdata); tempo<-1:nti; sseST<-array(0,c(quante)) observedST<-tabela predictedST<-array(0, c(nst,quante)) # timeTrST<-array(0, c(F)) # timeTsST<-array(0, c(F)) # This is whole data, we need to perform cross-validation Alldata<-t(inputdata) inputdata<-t(inputdata) # Want to save best fit models for column 1 Space, column 2 Time bestSTpar.models<-array(0,c(F,5)) bestSTmodels<-array(0,c(F,2)) mse.best<-array(0,c(F)) controlMSE<-array(0,c(F)) countK<-0 # when the models Space and Time are changed to another countST<-0 # when the model doesn’t produce prediction # for (ii in 1:max(fold)) #ii<-1 # { # for each fold # Variogram and model selection for training t1<-proc.time()[3] print(t1) #print("************FOLD ii **********") #print(ii) #Sensors and data in training #training<-which(fold!=ii) #print(training) #Sensors excluded #test<-which(fold==ii) # Definition of stations position pts<-cbind(x=c(xx[]), y=c(yy[])) num<-length(xx) dimnames(pts)[[1]] = c(1:num) df<-data.frame(stations=1:num) options(warn=1) # show warnings where they occur stations<-SpatialPointsDataFrame(pts,df) # warn stations dates<-seq(as.Date("2000/1/1"), by = "day", length.out=quante) # Creation of spacetime object dataall dataall<-STFDF(stations,dates, data.frame(Values = c(Alldata[,1:quante]))) str(dataall) class(dataall) # Creation of grid object DE_gridded<-SpatialPoints(cbind(xx,yy)) DE_gridded str(DE_gridded) gridded(DE_gridded) <- FALSE DE_pred<-STF(sp=as(DE_gridded,"SpatialPoints"), time=dataall@time) vv<-variogramST(Values~1,DE_pred,data=dataall,tunit="days", tlags=seq(1,quante), pseudo=TRUE, assumeRegular=FALSE, na.omit=T) # vv variogram for point, while vv for grid plot(vv,map=F, col=c(vv$timelag)) plot(vv,map=T, col=c(vv$timelag)) plot(vv,map=F, col=c(vv$timelag)) # Compute initial spatial and temporal nugget, sill, range and common SILL kojis<-which(vv$timelag==1) #kojis dis<-length(kojis) #dis # if at lag=1 we have only s 2 or 5 data, we set spatial nugget to 0.1, spatial range as the third od max distance and spatial sill as the mean of gamma at lag=1 if(dis<=5) { s.nug<-vv$gamma[1] #print("Spatial nugget when we have small number of data") #print(s.nug) s.range<-max(vv$dist[kojis])/3 #print("Spatial range when we have small number of data") #print(s.range) s.sill<-mean(vv$gamma[dis]) #print("Spatial sill when we have small number of data") #print(s.sill) } else { s.nug<-mean(vv$gamma[1:3]) #print("Spatial nugget when we have more then 5 data") #print(s.nug) s.range<-max(vv$dist[kojis])/3 #print("Spatial range when we have small number of data") #print(s.range) s.sill<-mean(vv$gamma[(dis-4):dis]) #print("Spatial sill when we have more then 5 data at first lag") #print(s.sill) } #print("Variogram") kojit<-which(vv$spacelag==0) #kojit dit<-length(kojit) #dit # if at spacelag=0, we have only s 2 or 5 data, we set temporal nugget to to mean of the first 1 sample variograms$gamma, but if is greater then 5 then the mean of first three values # for the t.sill we have to use mean of gamma values when we less then 5 values, but if it is greater we have to use meanof the last 5 gamma values if(dit<=5) { t.nug<-vv$gamma[1] #print("Temporal nugget when we have small number of data") #print(t.nug) t.range<-max(vv$timelag[kojit])/3 #print("Temporal range when we have small number of data") #print(t.range) t.sill<-mean(vv$gamma[kojit]) #print("Temporal sill when we have small number of data") #print(t.sill) } else { t.nug<-mean(vv$gamma[kojit[1:3]]) #print("Temporal nugget when we have more then 5 data") #print(t.nug) t.range<-max(vv$timelag[kojit])/3 #print("Temporal range when we have small number of data") #print(t.range) t.sill<-mean(vv$gamma[kojit[(dit-4):dit]]) #print("Temporal sill when we have more then 5 data at spacelag=0") #print(t.sill) } # We need to order in increasing order avgDist and to compute mean gamma of the last five data #print("Searching for the best sill") ord.sill<-vv[order(vv$avgDist),] ks<-dim(ord.sill)[1] #print("ks") #ks # SILL in common is the the max(variogram sample gamma) #SILL<-max(vv$gamma) print("ord.sill") ord.sill ks SILL<-mean(ord.sill$gamma[(ks-round(ks/100*p)):ks]) print("###############ESTIMATED GLOBAL SILL:") print(SILL) SILLMAX<-max(vv$gamma) print("###############MAX SILL:") print(SILLMAX) # Possible models order for Space and for Time m1<-"Exp" m2<-"Sph" m3<-"Mat" m4<-"Gau" m5<-"Lin" models<-c(m1, m2, m3, m4, m5) #print(models) nrmod<-length(models) ############################################################################## # SEARCH FOR THE BEST PARAMETER MODEL IN SEPARABLE MODEL # Each row corisponds to spatial part of models 1. Exp, 2. Sph, 3. Mat, 4. Gau, 5. Lin # Each column corisponds to temporal part of models 1. Exp, 2. Sph, 3. Mat, 4. Gau 5. Lin # When the model parameters don’t produce fit, mse are set to e+25 mse<-array(1e+39,c(nrmod, nrmod)) print(paste("fold ")) for (SS in 1:nrmod) { #print(SS) for (TT in 1:nrmod) { #separableModel<-vgmST("separable",space=vgm(s.sill,models[SS],s.range,s.nug),time=vgm(t.sill, models[TT], t.range, t.nug), sill=SILL) print(paste("separable with SILL:", SILL)) separableModel<-(tryCatch(vgmST("separable",space=vgm(s.sill,models[SS],s.range,s.nug),time=vgm(t.sill, models[TT], t.range, t.nug), sill=SILL), error=function(e) NULL)) extractPar(separableModel) plot(vv,separableModel,map=F) rs<-(tryCatch(fit.StVariogram(vv, separableModel, method = "L-BFGS-B", fit.method = 8), error=function(e) NULL)) #print(rs) if(is.null(rs) ==TRUE) {#filter out the configuration } else { #print("This model FIT is NOT null") # Important to use method method = "L-BFGS-B" and fit.method=8 #AA changed #model1<-fit.StVariogram(vv, separableModel, method = "L-BFGS-B", fit.method = 8) model1<-rs plot(vv,separableModel,map=F) #print(" SPATIAL MODEL**************************************************") #print(models[SS]) #print(" TEMPORAL MODEL**************************************************") #print(models[TT]) mse[SS,TT]<-attr(model1,"MSE") #print(" MSE IS**************************************************") #print(mse[SS,TT]) } } } #print(" MSE ERRORS ******************************") #print(mse) msen<-as.vector(mse) #msen duzina<-length(which(msen<1e+39)) #duzina if(duzina==0) { # try with SILLMAX for (SS in 1:nrmod) { #print(SS) for (TT in 1:nrmod) { #separableModel<-vgmST("separable",space=vgm(s.sill,models[SS],s.range,s.nug),time=vgm(t.sill, models[TT], t.range, t.nug), sill=SILL) print(paste("separable with SILLMAX:", SILLMAX)) separableModel<-(tryCatch(vgmST("separable",space=vgm(s.sill,models[SS],s.range,s.nug),time=vgm(t.sill, models[TT], t.range, t.nug), sill=SILLMAX), error=function(e) NULL)) extractPar(separableModel) plot(vv,separableModel,map=F) rs<-(tryCatch(fit.StVariogram(vv, separableModel, method = "L-BFGS-B", fit.method = 8), error=function(e) NULL)) #print(rs) if(is.null(rs) ==TRUE) {#filter out the configuration } else { #print("This model FIT is NOT null") # Important to use method method = "L-BFGS-B" and fit.method=8 #AA changed #model1<-fit.StVariogram(vv, separableModel, method = "L-BFGS-B", fit.method = 8) model1<-rs plot(vv,separableModel,map=F) #print(" SPATIAL MODEL**************************************************") #print(models[SS]) #print(" TEMPORAL MODEL**************************************************") #print(models[TT]) mse[SS,TT]<-attr(model1,"MSE") #print(" MSE IS**************************************************") #print(mse[SS,TT]) } } } #print(" MSE ERRORS ******************************") #print(mse) msen<-as.vector(mse) #msen duzina<-length(which(msen<1e+39)) #duzina } if(duzina==0){ stop("Failed in the construction of the separate model with all kinds of estimated SILL") } msen<-sort(msen,index=TRUE)$ix #msen modelsS<-rep(models, times=nrmod) #modelsS modelsT<-rep(models, each=nrmod) #modelsT modelsS<-modelsS[msen] #print(modelsS) modelsT<-modelsT[msen] #print(modelsT) ############## BEST MODELS FOR SPACE AND TIME SELECTED ################## # if we have more then 1 minimal values of MSE we select first by row M<-1 repeat { modelBestSpace<-modelsS[M] modelBestTime<-modelsT[M] #print(modelBestSpace) #print(modelBestTime) plot(vv,map=F, col=c(vv$timelag)) ## BestSTmodels are the best model selected per Space and Time bestSTmodels[,1]<-modelBestSpace bestSTmodels[,2]<-modelBestTime #print(" BEST MODEL SELECTED FOR THIS FOLD FOR SPACE AND FOR TIME") #print(bestSTmodels[ii,]) ## BEST FIT - WITH THE BEST SEPARATED MODEL separableBestModel<-(tryCatch(vgmST("separable",space=vgm(s.sill, modelBestSpace, s.range, s.nug), time=vgm(t.sill, modelBestTime, t.range, t.nug), sill=SILL), error=function(e) NULL)) #print(separableBestModel) ## BestSTpar.models are the best parameters of model selected and fitted in this order ## range.s nugget.s range.t nugget.t sill bestSTpar.models[,1:nrmod]<-extractPar(separableBestModel) #print(bestSTpar.models[ii,]) plot(vv,separableBestModel,map=F) best.fit.modelST<-(tryCatch(fit.StVariogram(vv, separableBestModel, method = "L-BFGS-B", fit.method = 8), error=function(e) NULL)) if(is.null(best.fit.modelST) ==TRUE) { print("best fit with max sill") ## only ARPA ACQUA falls in this case separableBestModel<-vgmST("separable",space=vgm(s.sill, modelBestSpace, s.range, s.nug), time=vgm(t.sill, modelBestTime, t.range, t.nug), sill=SILLMAX) bestSTpar.models[,1:nrmod]<-extractPar(separableBestModel) plot(vv,separableBestModel,map=F) print("best fit") best.fit.modelST<-fit.StVariogram(vv, separableBestModel, method = "L-BFGS-B", fit.method = 8) } #print(best.fit.modelST) # plot(vv,best.fit.modelST, wireframe=FALSE) # plot(vv,best.fit.modelST, wireframe=TRUE) ## MSE of this best FIT is the same get trying all possible 25 possibilities #print(" MSE OF THE BEST MODEL SELECTED IS **********") controlMSE[]<-attr(best.fit.modelST,"MSE") #print(" MSE.BEST IS") #print(controlMSE[ii]) t2<-proc.time()[3] timeTrST<-(t2-t1) ############################################################# t3<-proc.time()[3] rs2<-(tryCatch(krigeST(Values~1, data=dataall, newdata=DE_pred, modelList= best.fit.modelST, stAni = NULL, computeVar = TRUE, fullCovariance = FALSE, bufferNmax=2, progress=TRUE), error=function(e) NULL)) #print(rs2) if(is.null(rs2) ==TRUE) { #print("This PREDICTION IS NULL") #print(M) countK<-countK+1 # when the model fails producing the prediction } else { #print("This PREDICTION is not null") t3<-proc.time()[3] print(t3) ##AA changed DE_kriged<- krigeST(Values~1, data=dataall, newdata=DE_pred, modelList= best.fit.modelST, stAni = NULL, computeVar = TRUE, fullCovariance = FALSE, bufferNmax=2, progress=TRUE) DE_kriged<-rs2 #print(DE_kriged$var1.pred[test]) t4<-proc.time()[3] # Sometimes even the best fit doesn’t produce prediction S<-length(which(is.na(DE_kriged$var1.pred)==TRUE)) # Run time for training – finish here: if(S>0) { #print("You have to try other model set the error of this one to 1e+39") countST<-countST+1 } else { t2<-proc.time()[3] timeTrST<-t2-t1 # Run time for interpolation – start here: # Prediction for test data: #print("************Prediction done*************") bestSTmodels[,1]<-as.character(best.fit.modelST$space$model[2]) bestSTmodels[,2]<-as.character(best.fit.modelST$time$model[2]) #print(" BEST MODEL SELECTED FOR THIS FOLD FOR SPACE AND FOR TIME") #print(bestSTmodels[ii,]) # Run time for test – finish here: #print("************TEST DATA*************") #print(test) # Prediction for test data: predST<-matrix(DE_kriged$var1.pred,nrow=nst) predictedST[,1:quante]<-predST[,1:quante] #print(predictedST[test,]) M<-duzina timeTsST<-(t4-t3) print("Time Ts") print(timeTsST) } } M<-M+1 if(M>=duzina) {break} } ### repeat # } # end of each fold sseST<-(predictedST-observedST)^2 sseST<-sum(sseST) rsseST<-sqrt(sseST/(nst*quante)) #print ("*********** SAVE THE RESULTS FOR ST-KRIGING ********") print("******* TIME IN CROSS VALIDATION FOR TRAINING AND TEST****************") totalTrST<-sum(timeTrST) totalTsST<-sum(timeTsST) #write.csv (predictedST, file = "predictionST.csv") observedData<-inputdata #write.csv(observedData, file = "observedData.csv") #sseST aicST<-nst*quante*log(sseST/(nst*quante))+2*kST aiccST<-aicST+(2*kST*(kST+1))/(nst*quante-kST-1) bicST<-nst*quante*log(sseST/(nst*quante))+log(nst*quante)*kST mse.STbest<-cbind(mse.best, controlMSE) # write.csv (mse.STbest, file="mse.STbest.csv") #plot(vv,map=FALSE, col=c(vv$timelag)) #dev.copy(bmp, 'Sample variogram time lags ST.bmp') #dev.off() #plot(vv,map=TRUE, col=c(vv$timelag)) #dev.copy(bmp,'surface Samle Variogram model ST.bmp') #dev.off() #plot(vv,wireframe=TRUE) #dev.copy(bmp,'3D variogram ST.bmp') dev.off() #plot(vv,wireframe=FALSE) #dev.copy(bmp,'2D Variogram MAP.bmp') #dev.off() # write.csv(bestSTmodels, file = "bestSTmodels.csv") # write.csv(bestSTpar.models, file = "bestSTpar.models.csv") print ("*********** Count how many time fit with these models don’t produce FIT ********") print(countST) # Order of data in results: countST, sseST, rsseST, aicST, aiccST, bicST, totalTrK, totalTsK) results<-cbind(timeTrST, timeTsST) write.csv(results, file = "TIME-ST.csv") results getwd() } dir<-c("C:\\Kriging -Script\\time\\exp\\ARPA","C:\\Kriging -Script\\time\\exp\\ARPAACQUA","C:\\Kriging -Script\\time\\exp\\MESA","C:\\Kriging -Script\\time\\exp\\NCDCS","C:\\Kriging -Script\\time\\exp\\NCDCP","C:\\Kriging -Script\\time\\exp\\NCDCT","C:\\Kriging -Script\\time\\exp\\TCEQW","C:\\Kriging -Script\\time\\exp\\TCEQO","C:\\Kriging -Script\\time\\exp\\TCEQT","C:\\Kriging -Script\\time\\exp\\SAC","C:\\Kriging -Script\\time\\exp\\SR") file<-c("fondo.txt","acqua.txt","pol.txt","sol.txt","prec.txt","temp.txt","wind.txt","ozone.txt","temp.txt","temp.txt","dif.txt") quanteList<-c(8,8,48,48,48,48,48,48,48,12,12) dirI<-1 p=5 # percentage of sample values (top-ranked according to spatio-temporal distances) processed to estimate intial global SILL exp<-experiment(dir[dirI],file[dirI],quanteList[dirI],p) ################