#=   oe
Output error for PEM

Author : Lars Lindemann @2015
                                                                    =#
function oe(iddata::iddataObject,X0::Array{Float64},nf::Int64,nb::Int64,nk::Int64;stabilityFix::Bool=false,stepSizeControl::Bool=true)

  maxIterations= 30;
  exitTreshold = 0.0001;
  y            = iddata.y;
  u            = iddata.u;

  A            = X0[1:nf];
  B            = X0[nf+1:nf+nb];
  theta        = [A ; B];
  T            = theta;
  V            = zeros(maxIterations);
  mu           = 1;
  V_out        = 0;

  # find time horizon (largest value going back in history)
  n1             = nb+nk-1;
  n              = findmax([n1; nf]);
  timeHorizon    = n[1] + 1;
  N              = length(y)-timeHorizon+1;

  result = false;
  counter = 1;

  while(!result)

    # Calculate first and second order derivatives
    V_g,V_h,V[counter] = calcDerivativesOE(y,u,A,B,timeHorizon,nf,nb,nk);

    # if MSE got worse in the last step, control the step size
    if counter == 1
      theta = theta - mu*(V_h\V_g);
      T     = [T theta];
    elseif ( V[counter]>V[counter-1] || isnan(V[counter]) ) && stepSizeControl
      # reset counter and lower step size
      counter    -= 1;
      mu         *= 0.1;
      # calculate new theta
      theta                    = T[:,size(T,2)-1];
      V_g,V_h,V[counter+1]     = calcDerivativesOE(y,u,theta[1:nf],theta[nf+1:nf+nb],timeHorizon,nf,nb,nk);
      theta                    = theta - mu*(V_h\V_g);
      T[:,size(T,2)]           = theta;
    # if MSE got better in the last step
    else
      # reset mu to 1
      mu = 1;

      # exit criterion after a successfull step
      if abs(V[counter]-V[counter-1])<exitTreshold
        result = true;
        V_out  = V[counter]*N;
      elseif counter == maxIterations
        result = true
        V_out  = V[counter]*N;
      else
        # use steepest descent
        theta = theta - mu*(V_h\V_g);
        T     = [T theta];
      end
    end


    # check for stability and inverse roots if necessary
    if stabilityFix && result
      theta[1:nf]  = checkStability(theta[1:nf])
    end

    # set new thetas for the next step
    A     = theta[1:nf];
    B     = theta[nf+1:nf+nb];

    counter +=1;
  end

  # build idModel as output
  Model = createModelOutput(theta,"oe",nf,nb,0,0,nk,iddata.Ts,V_out,N)

  return Model;
end

#=   oe_func
Calculates loss function for PEM2 approach

Author : Lars Lindemann @2015
                                                                    =#
function oe_func(x)
  global VV     = 0;
  y_predStorage = zeros(timeHorizon-1);
  V             = 0;

  for i = timeHorizon:length(y)
    # 1. calculate the one step ahead predictions
    y_prediction = 0;

    for ia = 1:nf
      if typeof(y_predStorage[i-ia])==ForwardDiff.GraDual{Float64,nf+nb}
        y_prediction   += -x[ia]*((y_predStorage[i-ia]).v);
      elseif typeof(y_predStorage[i-ia])==ForwardDiff.FADHessian{Float64,nf+nb}
        y_prediction   += -x[ia]*(((y_predStorage[i-ia]).d).v);
      else
        y_prediction   += -x[ia]*y_predStorage[i-ia];
      end
    end
    for ib = 1:nb
      y_prediction     += x[nf+ib]*u[i-ib-nk+1];
    end

    # 2. update y_predStorage
    y_predStorage = [y_predStorage ; y_prediction];

    # 3. calculate actual V and residual
    V                += (y[i]-y_prediction)^2;

  end

  VV = V/(length(y)-timeHorizon+1);

  return VV;
end

#=   calcDerivativesOE
Calculates grammian and hessian of the loss function for PEM.

Author : Lars Lindemann @2015
                                                                    =#
function calcDerivativesOE(y::Array{Float64},u::Array{Float64},A::Array{Float64},B::Array{Float64},
                              timeHorizon::Int64,nf::Int64,nb::Int64,nk::Int64)
  y_prediction = zeros(length(y));
  psi          = zeros(length(y),nf+nb);
  V_g          = zeros(nf+nb);
  V_h          = zeros(nf+nb,nf+nb);
  e            = 0;

  for i = timeHorizon:length(y)
    # 1. calculate the one step ahead predictions
    y_prediction[i]    += (-transpose(A)*y_prediction[i-1:-1:i-nf])[1];
    y_prediction[i]    += (transpose(B)*u[i-nk:-1:i-nb-nk+1])[1];

    # 2. calculate grammian of the one step ahead predictions
    psi[i,1:nf]             = collect(-y_prediction[i-1:-1:i-nf].'-A.'*psi[i-1:-1:i-nf,1:nf]);
    psi[i,nf+1:nf+nb]       = collect(u[i-nk:-1:i-nk-nb+1].'-A.'*psi[i-1:-1:i-nf,nf+1:nf+nb]);

    # 3. calculate grammian and hessian of the quadratic criterion
    V_g   += -psi[i,:].'*(y[i]-y_prediction[i]);
    V_h   += psi[i,:].'*psi[i,:];

    # calculate summed error
    e     += (y[i]-y_prediction[i])^2;
  end

  # normalize
  V_g = V_g/(length(y)-timeHorizon+1);
  V_h = V_h/(length(y)-timeHorizon+1);
  VV   = e/(length(y)-timeHorizon+1);

  return V_g,V_h,VV;
end
