! See copyright notice in the COPYRIGHT file.
! ****************************************************************************** !
!> author: Jiaxing Qi
!! Routines and parameter definitions for the standard D2Q9 model
!!
?? include 'header/lbm_macros.inc'
?? include 'header/lbm_interfaceMacros.inc'
?? include 'header/lbm_d2q9Macros.inc'
module mus_d2q9_module
  use iso_c_binding, only: c_f_pointer

  ! include treelm modules
  use env_module,            only: rk
  use tem_varSys_module,     only: tem_varSys_type, tem_varSys_op_type
  use tem_param_module,      only: cs2inv, t2cs2inv, t2cs4inv, div1_12, &
    &                              div1_4, div1_9, div1_6, div1_18, div1_36, &
    &                              div1_2, div4_9, div2_9, div1_3, div2_3, rho0
  use tem_aux_module,        only: tem_abort

  ! include musubi modules
  use mus_field_prop_module,    only: mus_field_prop_type
  use mus_scheme_layout_module, only: mus_scheme_layout_type
  use mus_scheme_type_module,   only: mus_scheme_type
  use mus_param_module,         only: mus_param_type
  use mus_varSys_module,        only: mus_varSys_data_type

  implicit none

  private

  public :: bgk_d2q9
  public :: mrt_d2q9
  public :: mrt_d2q9_incomp
  public :: bgk_d2q9_incomp
  public :: MMIvD2Q9
  public :: bgk_d2q9_nNwtn_pl

  ! =============================================================================
  ! D2Q9 flow model
  ! =============================================================================
  !> Definition of the discrete velocity set
  integer, parameter :: QQ   = 9   !< number of pdf directions

  integer, parameter :: q__W = 1     !< west             x-
  integer, parameter :: q__S = 2     !< south            y-
  integer, parameter :: q__E = 3     !< east             x+
  integer, parameter :: q__N = 4     !< north            y+
  integer, parameter :: q_SW = 5     !<                  y-,x-
  integer, parameter :: q_NW = 6     !<                  y+,x-
  integer, parameter :: q_SE = 7     !<                  y-,x+
  integer, parameter :: q_NE = 8     !<                  y+,x+
  integer, parameter :: q__0 = 9     !< rest density is last

  ! General direction index
  integer,parameter :: qN0 = 1  !< west             x-
  integer,parameter :: q0N = 2  !< south            y-
  integer,parameter :: q10 = 3  !< east             x+
  integer,parameter :: q01 = 4  !< north            y+
  integer,parameter :: qNN = 5  !<                  y-,x-
  integer,parameter :: qN1 = 6  !<                  y+,x-
  integer,parameter :: q1N = 7  !<                  y-,x+
  integer,parameter :: q11 = 8  !<                  y+,x+
  integer,parameter :: q00 = 9  !< rest

  ! D2Q9 MRT pdf -> moment transformation matrix
  ! How to use:
  ! do iDir = 1, QQ
  !   moment(iDir) = sum( PDF(:) * MMtrD2Q9(:,iDir) )
  ! end do
  !     W          S          E          N         SW         NW         SE         NE          0
  real(kind=rk), dimension(9,9),parameter  :: MMtrD2Q9 = &
  reshape((/ &
    1._rk,     1._rk,     1._rk,     1._rk,     1._rk,     1._rk,     1._rk,     1._rk,     1._rk,& !
   -1._rk,    -1._rk,    -1._rk,    -1._rk,     2._rk,     2._rk,     2._rk,     2._rk,    -4._rk,& !mom2
   -2._rk,    -2._rk,    -2._rk,    -2._rk,     1._rk,     1._rk,     1._rk,     1._rk,     4._rk,& !mom3
   -1._rk,     0._rk,     1._rk,     0._rk,    -1._rk,    -1._rk,     1._rk,     1._rk,     0._rk,&
    2._rk,     0._rk,    -2._rk,     0._rk,    -1._rk,    -1._rk,     1._rk,     1._rk,     0._rk,& !mom5
    0._rk,    -1._rk,     0._rk,     1._rk,    -1._rk,     1._rk,    -1._rk,     1._rk,     0._rk,&
    0._rk,     2._rk,     0._rk,    -2._rk,    -1._rk,     1._rk,    -1._rk,     1._rk,     0._rk,& !mom7
    1._rk,    -1._rk,     1._rk,    -1._rk,     0._rk,     0._rk,     0._rk,     0._rk,     0._rk,& !mom8
    0._rk,     0._rk,     0._rk,     0._rk,     1._rk,    -1._rk,    -1._rk,     1._rk,     0._rk & !mom9
   /),(/9,9/))

  ! D2Q9 MRT moment -> pdf transformation matrix
  ! How to use:
  ! do iDir = 1, QQ
  !   pdf(iDir) = sum( moment(:) * MMIvD2Q9(iDir,:) )
  ! end do
  real(kind=rk), dimension(9,9), parameter  :: MMIvD2Q9 =  &
            reshape((/ &
!     1         2         3        4         5         6         7        8        9
 div1_9, -div1_36, -div1_18, -div1_6,   div1_6,    0._rk,    0._rk,  div1_4,   0._rk,  & ! W
 div1_9, -div1_36, -div1_18,   0._rk,    0._rk,  -div1_6,   div1_6, -div1_4,   0._rk,  & ! S
 div1_9, -div1_36, -div1_18,  div1_6,  -div1_6,    0._rk,    0._rk,  div1_4,   0._rk,  & ! E
 div1_9, -div1_36, -div1_18,   0._rk,    0._rk,   div1_6,  -div1_6, -div1_4,   0._rk,  & ! N
 div1_9,  div1_18,  div1_36, -div1_6, -div1_12,  -div1_6, -div1_12,   0._rk,  div1_4,  & ! SW
 div1_9,  div1_18,  div1_36, -div1_6, -div1_12,   div1_6,  div1_12,   0._rk, -div1_4,  & ! NW
 div1_9,  div1_18,  div1_36,  div1_6,  div1_12,  -div1_6, -div1_12,   0._rk, -div1_4,  & ! SE
 div1_9,  div1_18,  div1_36,  div1_6,  div1_12,   div1_6,  div1_12,   0._rk,  div1_4,  & ! NE
 div1_9,  -div1_9,   div1_9,   0._rk,    0._rk,    0._rk,    0._rk,   0._rk,   0._rk   & ! 0
   /),(/9,9/), order=(/2,1/))


contains

! ****************************************************************************** !
  !> Improved BGK model (with Galilean correction term)
  !! taken from Martin Geier cumulent paper 2015
?? copy :: compute_routineHeader( bgk_d2q9 )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer       :: iElem
    real(kind=rk) :: f(-1:1,-1:1)
    real(kind=rk) :: u,v,u2,v2
    real(kind=rk) :: rho, rho_omg
    real(kind=rk) :: inv_rho
    real(kind=rk) :: omega, cmpl_o, fac
    real(kind=rk) :: m20, m02
    real(kind=rk) :: sumX1, sumXN, sumY1, sumYN
    real(kind=rk) :: Gx, Gy
    real(kind=rk) :: x0, x1, xn, y0, y1, yn
    ! ---------------------------------------------------------------------------

    omega  = fieldProp(1)%fluid%omLvl( level )
    cmpl_o = 1._rk - omega
    fac    = 4.5_rk - 2.25_rk * omega

!$omp do schedule(static)
    !NEC$ ivdep
    !DIR$ NOVECTOR
    nodeloop: do iElem = 1, nSolve
      f(-1, 0) = inState( ?FETCH?( qN0, 1, iElem, QQ, QQ, nElems,neigh))
      f( 0,-1) = inState( ?FETCH?( q0N, 1, iElem, QQ, QQ, nElems,neigh))
      f( 1, 0) = inState( ?FETCH?( q10, 1, iElem, QQ, QQ, nElems,neigh))
      f( 0, 1) = inState( ?FETCH?( q01, 1, iElem, QQ, QQ, nElems,neigh))
      f(-1,-1) = inState( ?FETCH?( qNN, 1, iElem, QQ, QQ, nElems,neigh))
      f(-1, 1) = inState( ?FETCH?( qN1, 1, iElem, QQ, QQ, nElems,neigh))
      f( 1,-1) = inState( ?FETCH?( q1N, 1, iElem, QQ, QQ, nElems,neigh))
      f( 1, 1) = inState( ?FETCH?( q11, 1, iElem, QQ, QQ, nElems,neigh))
      f( 0, 0) = inState( ?FETCH?( q00, 1, iElem, QQ, QQ, nElems,neigh))

      sumX1 = sum(f( 1,:))
      sumXN = sum(f(-1,:))
      rho = sumX1 + sumXN + sum(f(0,:))
      inv_rho = 1.0_rk / rho

      sumY1 = sum(f(:, 1))
      sumYN = sum(f(:,-1))
      u = ( sumX1 - sumXN ) * inv_rho
      v = ( sumY1 - sumYN ) * inv_rho

      ! u v square
      u2 = u*u
      v2 = v*v

      ! second moments, by equation A.7 and A.8
      m20 = ( sumX1 + sumXN ) * inv_rho
      m02 = ( sumY1 + sumYN ) * inv_rho

      ! calculate Galilean correction term, by equation A.13 and A.14
      Gx = fac * u2 * ( m20 - div1_3 - u2 )
      Gy = fac * v2 * ( m02 - div1_3 - v2 )

      ! X Y components of eq
      ! by equation A.19 - A.21
      X0 = -div2_3 + u2 + Gx
      X1 = - ( X0 + 1.0d0 + u ) * 0.5d0
      XN = X1 + u

      Y0 = -div2_3 + v2 + Gy
      Y1 = - ( Y0 + 1.0d0 + v ) * 0.5d0
      YN = Y1 + v

      rho_omg = rho * omega
      X0 = X0 * rho_omg
      X1 = X1 * rho_omg
      XN = XN * rho_omg

      outState( ?SAVE?( q00,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(0,0) + X0*Y0
      outState( ?SAVE?( qN0,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(-1,0) + XN*Y0
      outState( ?SAVE?( q10,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(1,0) + X1*Y0
      outState( ?SAVE?( q0N,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(0,-1) + X0*YN
      outState( ?SAVE?( q01,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(0,1) + X0*Y1
      outState( ?SAVE?( qNN,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(-1,-1) + XN*YN
      outState( ?SAVE?( q11,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(1,1) + X1*Y1
      outState( ?SAVE?( q1N,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(1,-1) + X1*YN
      outState( ?SAVE?( qN1,1,iElem,QQ,QQ,nElems,neigh )) = cmpl_o*f(-1,1) + XN*Y1

    end do nodeloop
!$omp end do nowait

  end subroutine bgk_d2q9
! ****************************************************************************** !

! ****************************************************************************** !
  !> Advection relaxation routine for the D2Q9 MRT model
  !! f( x+c, t+1 ) = f(x,t) - M^(-1)S( m - meq )
?? copy :: compute_routineHeader( mrt_d2q9 )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer       :: iElem
?? copy :: var9( f )
?? copy :: var9( fNeq )
    real(kind=rk) :: mom2, mom3, mom5, mom7, mom8, mom9
    real(kind=rk) :: meq2, meq3, meq8, meq9
    real(kind=rk) :: mneq2, mneq3, mneq5, mneq7, mneq8, mneq9
    real(kind=rk) :: s_mrt2, s_mrt3, s_mrt5, s_mrt7, s_mrt8, s_mrt9
    real(kind=rk) :: rho, inv_rho ! local density
    real(kind=rk) :: ux, uy, u2, v2
    ! ---------------------------------------------------------------------------
    s_mrt2 = fieldProp(1)%fluid%mrt( level )%s_mrt(2)
    s_mrt3 = fieldProp(1)%fluid%mrt( level )%s_mrt(3)
    s_mrt5 = fieldProp(1)%fluid%mrt( level )%s_mrt(5)
    s_mrt7 = fieldProp(1)%fluid%mrt( level )%s_mrt(7)
    s_mrt8 = fieldProp(1)%fluid%mrt( level )%s_mrt(8)
    s_mrt9 = fieldProp(1)%fluid%mrt( level )%s_mrt(9)

!$omp do schedule(static)
    !NEC$ ivdep
    !DIR$ NOVECTOR
    nodeloop: do iElem = 1, nSolve
      ! First load all local values into temp array
      f1 = inState( ?FETCH?( 1, 1, iElem, QQ, QQ, nElems,neigh))
      f2 = inState( ?FETCH?( 2, 1, iElem, QQ, QQ, nElems,neigh))
      f3 = inState( ?FETCH?( 3, 1, iElem, QQ, QQ, nElems,neigh))
      f4 = inState( ?FETCH?( 4, 1, iElem, QQ, QQ, nElems,neigh))
      f5 = inState( ?FETCH?( 5, 1, iElem, QQ, QQ, nElems,neigh))
      f6 = inState( ?FETCH?( 6, 1, iElem, QQ, QQ, nElems,neigh))
      f7 = inState( ?FETCH?( 7, 1, iElem, QQ, QQ, nElems,neigh))
      f8 = inState( ?FETCH?( 8, 1, iElem, QQ, QQ, nElems,neigh))
      f9 = inState( ?FETCH?( 9, 1, iElem, QQ, QQ, nElems,neigh))

      ! local density
?? copy :: rho_d2q9_s( rho, f )
      inv_rho = 1.0_rk/rho

      ! local x-, y-momentum
?? copy :: ux_d2q9_s( ux, f, inv_rho )
?? copy :: uy_d2q9_s( uy, f, inv_rho )

      u2 = ux * ux
      v2 = uy * uy
      ! meq1 =  rho ! rho
      meq2 =  -2._rk*rho + 3._rk*rho*(u2+v2) ! e
      meq3 =  rho - 3._rk*rho*(u2+v2) ! eps
      ! meq4 =  ux ! jx
      ! meq5 = -ux ! qx
      ! meq6 =  uy ! jy
      ! meq7 = -uy ! qy
      meq8 =  rho*(u2 - v2) ! pxx
      meq9 =  rho*ux*uy ! pxy

      ! convert pdf into moment
      mom2 = - f1 - f2 - f3 - f4 + 2.0_rk * ( f5 + f6 + f7 + f8 ) - 4.0_rk * f9
      mom3 = -2.0_rk * (f1 + f2 + f3 + f4) + f5 + f6 + f7 + f8 + 4.0_rk * f9
      mom5 = 2.0_rk * ( f1 - f3 ) - f5 - f6 + f7 + f8
      mom7 = 2.0_rk * ( f2 - f4 ) - f5 + f6 - f7 + f8
      mom8 = f1 - f2 + f3 - f4
      mom9 = f5 - f6 - f7 + f8

      ! compute neq moment
      ! mneq1 = s_mrt1 * ( mom1 - meq1 ) = 0
      mneq2 = s_mrt2 * ( mom2 - meq2 )
      mneq3 = s_mrt3 * ( mom3 - meq3 )
      ! mneq4 = s_mrt4 * ( mom4 - meq4 ) = 0
      mneq5 = s_mrt5 * ( mom5 + rho*ux ) ! meq5 = -ux
      ! mneq6 = s_mrt6 * ( mom6 - meq6 ) = 0
      mneq7 = s_mrt7 * ( mom7 + rho*uy ) ! meq7 = -uy
      mneq8 = s_mrt8 * ( mom8 - meq8 )
      mneq9 = s_mrt9 * ( mom9 - meq9 )

      ! compute fNeq
      ! do iDir = 1, 9
      !   fneq(iDir) = sum( MMIvD2Q9(iDir,:) * mneq(:) )
      ! end do
      ! mneq1 = mneq4 = mneq6 = 0
      fNeq1 = - div1_36*mneq2 - div1_18*mneq3 + div1_6*mneq5 + div1_4*mneq8
      fNeq2 = - div1_36*mneq2 - div1_18*mneq3 + div1_6*mneq7 - div1_4*mneq8
      fNeq3 = - div1_36*mneq2 - div1_18*mneq3 - div1_6*mneq5 + div1_4*mneq8
      fNeq4 = - div1_36*mneq2 - div1_18*mneq3 - div1_6*mneq7 - div1_4*mneq8
      fNeq5 = + div1_18*mneq2 + div1_36*mneq3 - div1_12*(mneq5+mneq7) + div1_4*mneq9
      fNeq6 = + div1_18*mneq2 + div1_36*mneq3 - div1_12*(mneq5-mneq7) - div1_4*mneq9
      fNeq7 = + div1_18*mneq2 + div1_36*mneq3 + div1_12*(mneq5-mneq7) - div1_4*mneq9
      fNeq8 = + div1_18*mneq2 + div1_36*mneq3 + div1_12*(mneq5+mneq7) + div1_4*mneq9
      fNeq9 = div1_9*(-mneq2+mneq3)

      outState( ?SAVE?( 1,1, iElem, 9, 9,nElems,neigh )) = f1 - fNeq1
      outState( ?SAVE?( 2,1, iElem, 9, 9,nElems,neigh )) = f2 - fNeq2
      outState( ?SAVE?( 3,1, iElem, 9, 9,nElems,neigh )) = f3 - fNeq3
      outState( ?SAVE?( 4,1, iElem, 9, 9,nElems,neigh )) = f4 - fNeq4
      outState( ?SAVE?( 5,1, iElem, 9, 9,nElems,neigh )) = f5 - fNeq5
      outState( ?SAVE?( 6,1, iElem, 9, 9,nElems,neigh )) = f6 - fNeq6
      outState( ?SAVE?( 7,1, iElem, 9, 9,nElems,neigh )) = f7 - fNeq7
      outState( ?SAVE?( 8,1, iElem, 9, 9,nElems,neigh )) = f8 - fNeq8
      outState( ?SAVE?( 9,1, iElem, 9, 9,nElems,neigh )) = f9 - fNeq9

    enddo nodeloop
!$omp end do nowait

  end subroutine mrt_d2q9
! ****************************************************************************** !

! ****************************************************************************** !
  !> Advection relaxation routine for the D2Q9 MRT model
  !! f( x+c, t+1 ) = f(x,t) - M^(-1)S( m - meq )
?? copy :: compute_routineHeader( mrt_d2q9_incomp )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer       :: iElem
?? copy :: var9( f )
?? copy :: var9( fNeq )
    real(kind=rk) :: mom2, mom3, mom5, mom7, mom8, mom9
    real(kind=rk) :: meq2, meq3, meq8, meq9
    real(kind=rk) :: mneq2, mneq3, mneq5, mneq7, mneq8, mneq9
    real(kind=rk) :: s_mrt2, s_mrt3, s_mrt5, s_mrt7, s_mrt8, s_mrt9
    real(kind=rk) :: rho, inv_rho ! local density
    real(kind=rk) :: ux, uy, u2, v2
    ! ---------------------------------------------------------------------------
    inv_rho = 1.0_rk/rho0

    s_mrt2 = fieldProp(1)%fluid%mrt( level )%s_mrt(2)
    s_mrt3 = fieldProp(1)%fluid%mrt( level )%s_mrt(3)
    s_mrt5 = fieldProp(1)%fluid%mrt( level )%s_mrt(5)
    s_mrt7 = fieldProp(1)%fluid%mrt( level )%s_mrt(7)
    s_mrt8 = fieldProp(1)%fluid%mrt( level )%s_mrt(8)
    s_mrt9 = fieldProp(1)%fluid%mrt( level )%s_mrt(9)

!$omp do schedule(static)
    !NEC$ ivdep
    !DIR$ NOVECTOR
    nodeloop: do iElem = 1, nSolve
      ! First load all local values into temp array
      f1 = inState( ?FETCH?( 1, 1, iElem, QQ, QQ, nElems,neigh))
      f2 = inState( ?FETCH?( 2, 1, iElem, QQ, QQ, nElems,neigh))
      f3 = inState( ?FETCH?( 3, 1, iElem, QQ, QQ, nElems,neigh))
      f4 = inState( ?FETCH?( 4, 1, iElem, QQ, QQ, nElems,neigh))
      f5 = inState( ?FETCH?( 5, 1, iElem, QQ, QQ, nElems,neigh))
      f6 = inState( ?FETCH?( 6, 1, iElem, QQ, QQ, nElems,neigh))
      f7 = inState( ?FETCH?( 7, 1, iElem, QQ, QQ, nElems,neigh))
      f8 = inState( ?FETCH?( 8, 1, iElem, QQ, QQ, nElems,neigh))
      f9 = inState( ?FETCH?( 9, 1, iElem, QQ, QQ, nElems,neigh))

      ! local density
?? copy :: rho_d2q9_s( rho, f )

      ! local x-, y-momentum
?? copy :: ux_d2q9_s( ux, f, inv_rho )
?? copy :: uy_d2q9_s( uy, f, inv_rho )

      u2 = ux * ux
      v2 = uy * uy
      ! meq1 =  rho ! rho
      meq2 =  -2._rk*rho + 3._rk*(u2+v2) ! e
      meq3 =  rho - 3._rk*(u2+v2) ! eps
      ! meq4 =  ux ! jx
      ! meq5 = -ux ! qx
      ! meq6 =  uy ! jy
      ! meq7 = -uy ! qy
      meq8 =  (u2 - v2) ! pxx
      meq9 =  ux*uy ! pxy

      ! convert pdf into moment
      mom2 = - f1 - f2 - f3 - f4 + 2.0_rk * ( f5 + f6 + f7 + f8 ) - 4.0_rk * f9
      mom3 = -2.0_rk * (f1 + f2 + f3 + f4) + f5 + f6 + f7 + f8 + 4.0_rk * f9
      mom5 = 2.0_rk * ( f1 - f3 ) - f5 - f6 + f7 + f8
      mom7 = 2.0_rk * ( f2 - f4 ) - f5 + f6 - f7 + f8
      mom8 = f1 - f2 + f3 - f4
      mom9 = f5 - f6 - f7 + f8

      ! compute neq moment
      ! mneq1 = s_mrt1 * ( mom1 - meq1 ) = 0
      mneq2 = s_mrt2 * ( mom2 - meq2 )
      mneq3 = s_mrt3 * ( mom3 - meq3 )
      ! mneq4 = s_mrt4 * ( mom4 - meq4 ) = 0
      mneq5 = s_mrt5 * ( mom5 + ux ) ! meq5 = -ux
      ! mneq6 = s_mrt6 * ( mom6 - meq6 ) = 0
      mneq7 = s_mrt7 * ( mom7 + uy ) ! meq7 = -uy
      mneq8 = s_mrt8 * ( mom8 - meq8 )
      mneq9 = s_mrt9 * ( mom9 - meq9 )

      ! compute fNeq
      ! do iDir = 1, 9
      !   fneq(iDir) = sum( MMIvD2Q9(iDir,:) * mneq(:) )
      ! end do
      ! mneq1 = mneq4 = mneq6 = 0
      fNeq1 = - div1_36*mneq2 - div1_18*mneq3 + div1_6*mneq5 + div1_4*mneq8
      fNeq2 = - div1_36*mneq2 - div1_18*mneq3 + div1_6*mneq7 - div1_4*mneq8
      fNeq3 = - div1_36*mneq2 - div1_18*mneq3 - div1_6*mneq5 + div1_4*mneq8
      fNeq4 = - div1_36*mneq2 - div1_18*mneq3 - div1_6*mneq7 - div1_4*mneq8
      fNeq5 = + div1_18*mneq2 + div1_36*mneq3 - div1_12*(mneq5+mneq7) + div1_4*mneq9
      fNeq6 = + div1_18*mneq2 + div1_36*mneq3 - div1_12*(mneq5-mneq7) - div1_4*mneq9
      fNeq7 = + div1_18*mneq2 + div1_36*mneq3 + div1_12*(mneq5-mneq7) - div1_4*mneq9
      fNeq8 = + div1_18*mneq2 + div1_36*mneq3 + div1_12*(mneq5+mneq7) + div1_4*mneq9
      fNeq9 = div1_9*(-mneq2+mneq3)

      outState( ?SAVE?( 1,1, iElem, 9, 9,nElems,neigh )) = f1 - fNeq1
      outState( ?SAVE?( 2,1, iElem, 9, 9,nElems,neigh )) = f2 - fNeq2
      outState( ?SAVE?( 3,1, iElem, 9, 9,nElems,neigh )) = f3 - fNeq3
      outState( ?SAVE?( 4,1, iElem, 9, 9,nElems,neigh )) = f4 - fNeq4
      outState( ?SAVE?( 5,1, iElem, 9, 9,nElems,neigh )) = f5 - fNeq5
      outState( ?SAVE?( 6,1, iElem, 9, 9,nElems,neigh )) = f6 - fNeq6
      outState( ?SAVE?( 7,1, iElem, 9, 9,nElems,neigh )) = f7 - fNeq7
      outState( ?SAVE?( 8,1, iElem, 9, 9,nElems,neigh )) = f8 - fNeq8
      outState( ?SAVE?( 9,1, iElem, 9, 9,nElems,neigh )) = f9 - fNeq9

    enddo nodeloop
!$omp end do nowait

  end subroutine mrt_d2q9_incomp
! ****************************************************************************** !

! ****************************************************************************** !
?? copy :: compute_routineHeader( bgk_d2q9_incomp )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem
    ! temporary distribution variables
?? copy :: var9( f )
?? copy :: var9( fEq )
    real(kind=rk) :: u_x, u_y
    real(kind=rk) :: rho, inv_rho, usq, ucx
    real(kind=rk) :: omega, cmpl_o
    real(kind=rk) :: c0, c1, c2, c3
    ! ---------------------------------------------------------------------------
    inv_rho = 1.0_rk/rho0

    omega = fieldProp(1)%fluid%omLvl( level )
    cmpl_o  = 1._rk - omega

!$omp do schedule(static)
    !NEC$ ivdep
    !DIR$ NOVECTOR
    nodeloop: do iElem = 1, nSolve

      f1 = inState( ?FETCH?( 1, 1, iElem, 9, 9, nElems,neigh))
      f2 = inState( ?FETCH?( 2, 1, iElem, 9, 9, nElems,neigh))
      f3 = inState( ?FETCH?( 3, 1, iElem, 9, 9, nElems,neigh))
      f4 = inState( ?FETCH?( 4, 1, iElem, 9, 9, nElems,neigh))
      f5 = inState( ?FETCH?( 5, 1, iElem, 9, 9, nElems,neigh))
      f6 = inState( ?FETCH?( 6, 1, iElem, 9, 9, nElems,neigh))
      f7 = inState( ?FETCH?( 7, 1, iElem, 9, 9, nElems,neigh))
      f8 = inState( ?FETCH?( 8, 1, iElem, 9, 9, nElems,neigh))
      f9 = inState( ?FETCH?( 9, 1, iElem, 9, 9, nElems,neigh))

?? copy :: rho_d2q9_s( rho, f )

?? copy :: ux_d2q9_s( u_x, f, inv_rho )
?? copy :: uy_d2q9_s( u_y, f, inv_rho )

      ! calculate fEq
?? copy :: fEq_d2q9_s(fEq, u_x, u_y, rho, 1.0_rk, usq, ucx, c0, c1, c2, c3)

      outState( ?SAVE?( 9,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f9 + omega*fEq9
      outState( ?SAVE?( 1,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f1 + omega*fEq1
      outState( ?SAVE?( 2,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f2 + omega*fEq2
      outState( ?SAVE?( 3,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f3 + omega*fEq3
      outState( ?SAVE?( 4,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f4 + omega*fEq4
      outState( ?SAVE?( 5,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f5 + omega*fEq5
      outState( ?SAVE?( 6,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f6 + omega*fEq6
      outState( ?SAVE?( 7,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f7 + omega*fEq7
      outState( ?SAVE?( 8,1, iElem, 9, 9,nElems,neigh )) = cmpl_o*f8 + omega*fEq8

    end do nodeloop
!$omp end do nowait

  end subroutine bgk_d2q9_incomp
! ****************************************************************************** !

! ****************************************************************************** !
  !> BGK with nonNewtonian Power Law model
  !!
?? copy :: compute_routineHeader( bgk_d2q9_nNwtn_pl )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer       :: iElem, ix,iy, omegaPos
    type(mus_varSys_data_type), pointer :: fPtr
    type(mus_scheme_type), pointer :: scheme
    real(kind=rk) :: f(-1:1,-1:1), fNeq(-1:1,-1:1)
    real(kind=rk) :: u,v,u2,v2,rho, inv_rho
    real(kind=rk) :: omega, fac
    real(kind=rk) :: m20, m02
    real(kind=rk) :: sumX1, sumXN, sumY1, sumYN
    real(kind=rk) :: Gx, Gy
    real(kind=rk) :: X(-1:1), Y(-1:1)
    real(kind=rk) :: cS, convSR, convVisc, k, n_1, s(3), shearRate, nuLB
    ! ---------------------------------------------------------------------------
    !call tem_abort('Error: bgk_d2q9_nNwtn_pl need to be tested. Refered to scheme(1) beforehand')

    ! access scheme via 1st variable method data which is a state variable
    call C_F_POINTER( varSys%method%val(1)%method_Data, fPtr )
    scheme => fPtr%solverData%scheme

    omegaPos = QQ + 1

    ! conversion factor for Shear Rate (or Strain Rate)
    convSR = params%physics%fac(level)%strainRate

    ! conversion factor for kinematic viscosity
    convVisc = params%physics%fac(level)%visc

    ! get nonNewtonian power law parameter
    k   = scheme%field(1)%fieldProp%fluid%nNwtn%PL%k &
      &   / params%physics%rho0 / convVisc
    n_1 = scheme%field(1)%fieldProp%fluid%nNwtn%PL%n_1

!$omp do schedule(static)
    !NEC$ ivdep
    !DIR$ NOVECTOR
    nodeloop: do iElem = 1, nSolve
      f(-1, 0) = inState( ?FETCH?( qN0, 1, iElem, 9, 10, nElems,neigh))
      f( 0,-1) = inState( ?FETCH?( q0N, 1, iElem, 9, 10, nElems,neigh))
      f( 1, 0) = inState( ?FETCH?( q10, 1, iElem, 9, 10, nElems,neigh))
      f( 0, 1) = inState( ?FETCH?( q01, 1, iElem, 9, 10, nElems,neigh))
      f(-1,-1) = inState( ?FETCH?( qNN, 1, iElem, 9, 10, nElems,neigh))
      f(-1, 1) = inState( ?FETCH?( qN1, 1, iElem, 9, 10, nElems,neigh))
      f( 1,-1) = inState( ?FETCH?( q1N, 1, iElem, 9, 10, nElems,neigh))
      f( 1, 1) = inState( ?FETCH?( q11, 1, iElem, 9, 10, nElems,neigh))
      f( 0, 0) = inState( ?FETCH?( q00, 1, iElem, 9, 10, nElems,neigh))

      omega  = inState( ?IDX?(omegaPos, iElem, 10, nElems) )
      fac    = 4.5_rk - 2.25_rk * omega

      sumX1 = sum(f( 1,:))
      sumXN = sum(f(-1,:))
      rho   = sumX1 + sumXN + sum(f(0,:))
      inv_rho = 1.0_rk / rho
      cS = -1.5_rk * omega * convSR * inv_rho

      sumY1 = sum(f(:, 1))
      sumYN = sum(f(:,-1))
      u = ( sumX1 - sumXN ) * inv_rho
      v = ( sumY1 - sumYN ) * inv_rho

      ! u v square
      u2 = u*u
      v2 = v*v

      ! second moments, by equation A.7 and A.8
      m20 = ( sumX1 + sumXN ) * inv_rho
      m02 = ( sumY1 + sumYN ) * inv_rho

      ! calculate Galilean correction term, by equation A.13 and A.14
      Gx = fac * u2 * ( m20 - div1_3 - u2 )
      Gy = fac * v2 * ( m02 - div1_3 - v2 )

      ! X Y components of eq
      ! by equation A.19 - A.21
      X( 0) = -div2_3 + u2 + Gx
      X( 1) = - ( X(0) + 1.0d0 + u ) * 0.5d0
      X(-1) = X(1) + u

      Y( 0) = -div2_3 + v2 + Gy
      Y( 1) = - ( Y(0) + 1.0d0 + v ) * 0.5d0
      Y(-1) = Y(1) + v

      !  fEq = rho * X * Y
      ! fNeq = f - fEq = f - rhoXY
      do iy = -1, 1
        do ix = -1, 1
          fNeq(ix,iy) = f(ix,iy) - rho * X(ix) * Y(iy)
        end do
      end do

      outState(?SAVE?(q00,1,iElem,9,10,nElems,neigh )) = f(0,0)   - omega*fNeq(0,0)
      outState(?SAVE?(qN0,1,iElem,9,10,nElems,neigh )) = f(-1,0)  - omega*fNeq(-1,0)
      outState(?SAVE?(q10,1,iElem,9,10,nElems,neigh )) = f(1,0)   - omega*fNeq(1,0)
      outState(?SAVE?(q0N,1,iElem,9,10,nElems,neigh )) = f(0,-1)  - omega*fNeq(0,-1)
      outState(?SAVE?(q01,1,iElem,9,10,nElems,neigh )) = f(0,1)   - omega*fNeq(0,1)
      outState(?SAVE?(qNN,1,iElem,9,10,nElems,neigh )) = f(-1,-1) - omega*fNeq(-1,-1)
      outState(?SAVE?(q11,1,iElem,9,10,nElems,neigh )) = f(1,1)   - omega*fNeq(1,1)
      outState(?SAVE?(q1N,1,iElem,9,10,nElems,neigh )) = f(1,-1)  - omega*fNeq(1,-1)
      outState(?SAVE?(qN1,1,iElem,9,10,nElems,neigh )) = f(-1,1)  - omega*fNeq(-1,1)

      s(1) = sum(fNeq(1,:)) + sum(fNeq(-1,:))
      s(2) = sum(fNeq(:,1)) + sum(fNeq(:,-1))
      s(3) = fNeq(1,1) + fNeq(-1,-1) - fNeq(1,-1) - fNeq(-1,1)
      s = s * cS
      shearRate = 2._rk * sqrt( s(1)*s(1) + s(2)*s(2) + s(3)*s(3) )
      nuLB = ( shearRate ** n_1 ) * k
outState( ?IDX?(omegaPos, iElem, 10, nElems) ) = 1._rk / ( 0.5_rk + 3._rk*nuLB )

    end do nodeloop
!$omp end do nowait

  end subroutine bgk_d2q9_nNwtn_pl
! ****************************************************************************** !

end module mus_d2q9_module
! ****************************************************************************** !
