# Load Library

library(forecast)
library(ggplot2)
library(TSstudio)
library(tseries)
library(tsutils)
library(funtimes)
library(flexmix)

WP <- read.csv('updated_wtbdata_245days.csv',
               header = TRUE,
               sep = ",",
               dec = ".")

regressors <- matrix(' ',nrow=134,ncol=10)

colnames(regressors) <- c("Model","Regressor 1","Regressor 2","Regressor 3","Regressor 4","Regressor 5","Regressor 6","Regressor 7","Regressor 8","Regressor 9")

metrics = matrix(' ',nrow=134,ncol=3)

colnames(metrics) <- c('RMSE', 'MAE', 'Score')

hyperparameters <- matrix(0,nrow=134,ncol=6)

colnames(hyperparameters) <- c("p","d","q","P","D","Q")


for (v in 1:134){
  
  # Preparation of Data
  
  t_s <- WP[WP$TurbID == v,] # v-th wind turbine
  
  t_s <- t_s[-c(1)] # remove ID
  
  y <- vector()
  
  w = 1
  
  u = 1
  
  for (i in 1:nrow(t_s)){
    if (i == 1){
      while (is.na(t_s[u,12])==T | 
             is.na(t_s[u,11])==T | 
             is.na(t_s[u,10])==T | 
             is.na(t_s[u,9])==T | 
             is.na(t_s[u,8])==T | 
             is.na(t_s[u,7])==T | 
             is.na(t_s[u,6])==T | 
             is.na(t_s[u,5])==T | 
             is.na(t_s[u,4])==T | 
             is.na(t_s[u,12])==T | 
             (t_s[u,12] < 0 & t_s[u,3] > 2.5) | 
             t_s[u,8] > 89 | 
             t_s[u,9] > 89 | 
             t_s[u,10] > 89 | 
             t_s[u,7] > 720 | 
             t_s[u,7] < -720 | 
             t_s[u,4] > 180 | 
             t_s[u,4] < -180){u = u + 1}
      t_s[i,-c(3)] <- t_s[u,-c(3)]
      if (t_s[i,12] < 0){t_s[i,12] = 0}
    }
    # set the NA value equal to the previous one
    else {if (is.na(t_s[i,12])==T){t_s[i,12]=t_s[i-1,12]
    y[w] = i
    w = w + 1}
      if (is.na(t_s[i,11])==T){t_s[i,11]=t_s[i-1,11]}
      if (is.na(t_s[i,10])==T){t_s[i,10]=t_s[i-1,10]}
      if (is.na(t_s[i,9])==T){t_s[i,9]=t_s[i-1,9]}
      if (is.na(t_s[i,8])==T){t_s[i,8]=t_s[i-1,8]}
      if (is.na(t_s[i,7])==T){t_s[i,7]=t_s[i-1,7]}
      if (is.na(t_s[i,6])==T){t_s[i,6]=t_s[i-1,6]}
      if (is.na(t_s[i,5])==T){t_s[i,5]=t_s[i-1,5]}
      if (is.na(t_s[i,4])==T){t_s[i,4]=t_s[i-1,4]}
      if (is.na(t_s[i,3])==T){t_s[i,3]=t_s[i-1,3]}
      
      # set the unknown value equal to the previous one
      if (t_s[i,12] < 0 & t_s[i,3] > 2.5){t_s[i,12]=t_s[i-1,12]
      y[w] = i
      w = w + 1}
      else if (t_s[i,8] > 89 | t_s[i,9] > 89 | t_s[i,10] > 89){t_s[i,12]=t_s[i-1,12]
      y[w] = i
      w = w + 1}
      
      # set the abnormal value equal to the previous one
      if (t_s[i,7] > 720 | t_s[i,7] < -720){t_s[i,12]=t_s[i-1,12]
      y[w] = i
      w = w + 1}
      else if (t_s[i,4] > 180 | t_s[i,4] < -180){t_s[i,12]=t_s[i-1,12]
      y[w] = i
      w = w + 1}
      
      # set negative value equal to 0
      if (t_s[i,12] < 0){t_s[i,12] = 0}
    }
  }
  
  y <- unique(y)
  
  # Wind Turbine Time Series
  
  fr = 144
  
  WT <- ts(t_s$Patv,
           start = 1,
           freq = fr)
  
  # Autocorrelation
  
  ac <- acf(WT,
            lag.max = length(WT),
            plot = F)
  
  ma = 0
  
  i = 2
  
  while (abs(ac$acf[i]) >= 0.025){
    ma = ma + 1 
    i = i + 1}
  
  hyperparameters[v,3] = ma
  
  ac <- acf(WT,
            plot = F,
            lag.max = ma + fr)
  
  # Check Trend/Seasonality
  
  t = 0
  
  s = 1
  
  x = vector() # lag of peacks
  
  `%ni%` <- Negate(`%in%`) # not in
  
  for (i in 2:(length(ac$acf))){
    if ((i-1) %ni% x & 
        ac$acf[i] < ac$acf[i-1]){
      t = t + 1} else if (i > 2){
        if ((i-1) %in% x & 
            ac$acf[i] < ac$acf[i-2]){
          t = t + 1}
      }
    if (i != length(ac$acf) & 
        ac$acf[i] > ac$acf[i-1] & 
        ac$acf[i] > ac$acf[i+1]){
      x[s] = i
      if (length(x) > 1){
        if (ac$acf[x[s-1]] > ac$acf[x[s]]){
          t = t + 1}
      }
      s = s + 1
    }
  }
  
  if (t/(ma+fr) >= 0.5 & length(x) == 0){
    print(paste("For the", v,"wind turbine there are", t, "descendent lags and", s, "peacks. The time series has trend but no seasonality"))
    t = 1
    s = 0
    hyperparameters[v,2] = t
    hyperparameters[v,5] = s} else if (t/(ma+fr) >= 0.5 & length(unique(diff(x))) != 1){
      print(paste("For the", v,"wind turbine there are", t, "descendent lags and", s, "peacks. The time series has trend but no seasonality"))
      t = 1
      s = 0
      hyperparameters[v,2] = t
      hyperparameters[v,5] = s} else if (t/(ma+fr) >= 0.5 & x[1]-unique(diff(x)) > 1 & x[1]-unique(diff(x)) %ni% x){
        print(paste("For the", v,"wind turbine there are", t, "descendent lags and", s, "peacks. The time series has trend but no seasonality"))
        t = 1
        s = 0
        hyperparameters[v,2] = t
        hyperparameters[v,5] = s} else if (t/(ma+fr) < 0.5 & length(unique(diff(x))) == 1){
          print(paste("For the", v,"wind turbine there are", t, "descendent lags and", s, "peacks. The time series has seasonality but no trend"))
          t = 0
          s = 1
          hyperparameters[v,2] = t
          hyperparameters[v,5] = s} else if (t/(ma+fr) >= 0.5& length(unique(diff(x))) == 1){
            print(paste("For the", v,"wind turbine there are", t, "descendent lags and", s, "peacks. The time series has trend and seasonality"))
            t = 1
            s = 1
            hyperparameters[v,2] = t
            hyperparameters[v,5] = s} else {
              print(paste("For the", v,"wind turbine there are",t,"descendent lags and", s, "peacks. The time series has not trend and seasonality"))
              t = 0
              s = 0
              hyperparameters[v,2] = t
              hyperparameters[v,5] = s}
  
  # Partial Autocorrelation
  
  ac <- pacf(WT,
             lag.max = length(WT),
             plot = F)
  
  ar = 0
  
  i = 1
  
  while (abs(ac$acf[i]) >= 0.025){
    ar = ar + 1 
    i = i + 1}
  
  hyperparameters[v,1] = ar
  
  if(s == 1){
  # Seasonal Adjustment 
  
  WT_des <- diff(WT,
                 lag = fr)
  
  # Autocorrelation TS Deseasonalized
  
  ac <- acf(WT_des,
            lag.max = length(WT),
            plot = F)
  
  ma_des = 0
  
  i = fr
  
  while (abs(ac$acf[i]) >= 0.025){
    ma_des = ma_des + 1 
    i = i + fr}
  
  hyperparameters[v,6] = ma_des
  
  # Partial Autocorrelation TS Deseasonalized
  
  ac <- pacf(WT_des,
             lag.max = length(WT),
             plot = F)
  
  ar_des = 0
  
  i = fr
  
  while (abs(ac$acf[i]) >= 0.025){
    ar_des = ar_des + 1 
    i = i + fr}
  
  hyperparameters[v,4] = ar_des
  
  }
  
  if(t == 1){
    
  #Trend Adjustment
  
  WT_det <- diff(WT,
                 lag = 1)
  
  # Autocorrelation TS Detrendalized
  
  ac <- acf(WT_det,
            lag.max = length(WT),
            plot = F)
  
  ma_det = 0
  
  i = 2
  
  while (abs(ac$acf[i]) >= 0.025){
    ma_det = ma_det + 1 
    i = i + 1}
  
  # Partial Autocorrelation TS Detrendalized
  
  ac <- pacf(WT_det,
             lag.max = length(WT),
             plot = F)
  
  ar_det = 0
  
  i = 1
  
  while (abs(ac$acf[i]) >= 0.025){
    ar_det = ar_det + 1 
    i = i + 1}
  }
  
  train.ts <- window(WT, 
                     start = 1,
                     end = 205)
  valid.ts <- window(WT, 
                     start = 205, 
                     end = 225)
  
  valid.idx = length(train.ts) + length(valid.ts) - 1 - y[which(y >= length(train.ts) & y <= length(train.ts) + length(valid.ts) - 1)] # index of values that has not to be used for evaluating models
  
  # Feature Selection
  
  score = 999999
  
  var = rep(3:11)
  
  z = vector()
  
  for (j in 1:9){
    for (i in var){
      if (length(z)<1){
        Ex <- ts(t_s[,i], 
                 start = 1, 
                 frequency = fr)
      } else {
        Ex <- ts(t_s[,c(z,i)], 
                 start = 1, 
                 frequency = fr)
      }
      
      train.Ex <- window(Ex,
                         start = 1,
                         end = 205)
      valid.Ex <- window(Ex, 
                         start = 205, 
                         end = 225)
      
      # Model
      
      if (t == 1 & s == 0){
        arimax <- Arima(train.ts,
                        order = c(ar_det,t,ar_det),
                        xreg = train.Ex)
        pmod = "ARIMAX"
      } else if (t == 0 & s == 1){
        arimax <- Arima(train.ts,
                        order = c(0,t,0),
                        seasonal = c(ar_des,s,ma_des),
                        xreg = train.Ex)
        pmod = "SARMAX"
      } else if (t == 1 & s == 1){
        arimax <- Arima(train.ts, 
                        order = c(ar_det,t,ma_det),
                        seasonal = c(ar_des,s,ma_des),
                        xreg = train.Ex)
        pmod = "SARIMAX"
      } else {
        arimax <- Arima(train.ts,
                        order = c(ar,t,ma),
                        xreg = train.Ex)
        pmod = "ARMAX"
      }
      
      arimax.pred <- forecast(arimax,
                              h = 20,
                              level = 0,
                              xreg = valid.Ex)
      
      arimax.pred$fitted[which(arimax.pred$fitted<0)] = 0
      
      arimax.pred$mean[which(arimax.pred$mean<0)] = 0
      
      arimax$residuals[which(arimax$fitted<0)] = arimax$x[which(arimax$fitted<0)] # it would be x (correct one) - fitted, in this case 0 such as is fixed like that in the previous lines
      
      # Scoring Model
      
      arimax.pred$mean[valid.idx] = valid.ts[valid.idx]
      
      ac <- accuracy(arimax.pred, valid.ts)
      
      if (ac[2,2] + ac[2,3] < score){
        score = ac[2,2] + ac[2,3]
        pvar = i
      }
      
    }
    
    if (pvar %in% var){
      var <- var[var != pvar] #delete the regressor found from the list
      z <- append(z,pvar)
    } else{break}
    
  }
  
  regressors[v,1:(length(z)+1)] = c(pmod,colnames(t_s)[z])
  
  Ex <- ts(t_s[,z], 
           start = 1, 
           frequency = fr)
  
  train.Ex <- window(Ex,
                     start = 1,
                     end = 205)
  
  valid.Ex <- window(Ex, 
                     start = 205, 
                     end = 225)
  
  # Model
  
  if (t == 1 & s == 0){
    arimax <- Arima(train.ts,
                    order = c(ar_det,t,ar_det),
                    xreg = train.Ex)
  } else if (t == 0 & s == 1){
    arimax <- Arima(train.ts,
                    order = c(0,t,0),
                    seasonal = c(ar_des,s,ma_des),
                    xreg = train.Ex)
  } else if (t == 1 & s == 1){
    arimax <- Arima(train.ts, 
                    order = c(ar_det,t,ma_det),
                    seasonal = c(ar_des,s,ma_des),
                    xreg = train.Ex)
  } else {
    arimax <- Arima(train.ts,
                    order = c(ar,t,ma),
                    xreg = train.Ex)
  }
    
  arimax.pred <- forecast(arimax,
                          h = 20,
                          level = 0,
                          xreg = valid.Ex)
    
  arimax.pred$fitted[which(arimax.pred$fitted<0)] = 0
    
  arimax.pred$mean[which(arimax.pred$mean<0)] = 0
    
  arimax$residuals[which(arimax$fitted<0)] = arimax$x[which(arimax$fitted<0)] # it would be x (correct one) - fitted, in this case 0 such as is fixed like that in the previous lines
  
  # Scoring Model
  
  arimax.pred$mean[valid.idx] = valid.ts[valid.idx]
  
  ac <- accuracy(arimax.pred, valid.ts)
  
  metrics[v,] = c(round(ac[2,2],3),round(ac[2,3],3),round(mean(c(ac[2,2],ac[2,3])),3))
  
}

print(hyperparameters)

print(regressors)

print(metrics)

