 module sw_core_mod

 use fv_mp_mod,         only: ng, is,js,ie,je, isd,jsd,ied,jed,  &
                              mp_corner_comm, domain
 use fv_grid_tools_mod, only: npx=>npx_g,npy=>npy_g, cosa, sina,  &
                              rdxc, rdyc, dx,dy, dxc,dyc, dxa,dya,  &
                              rdxa, rdya, area, rarea, rarea_c, rdx, rdy
 use fv_grid_tools_mod, only: grid_type
 use tp_core_mod,       only: fv_tp_2d, pert_ppm, copy_corners
 use fv_grid_utils_mod, only: edge_vect_s,edge_vect_n,edge_vect_w,edge_vect_e,  &
                              sw_corner, se_corner, ne_corner, nw_corner,       &
                              cosa_u, cosa_v, cosa_s, sina_s, sina_u, sina_v,   &
                              rsin_u, rsin_v, rsin_v, rsina, ec1, ec2, ew, es,  &
                              big_number, da_min_c, fC, f0,   &
                              rsin2, Gnomonic_grid
!4TAF#ifdef SW_DYNAMICS
!4TAF use test_cases_mod,    only: test_case
!4TAF#endif
 implicit none

!4TAF Private parameters moved inside specific subroutines
#if defined(TAF_DOES_NOT_LIKE)
  real, parameter:: r3 =   1./3.
  real, parameter:: t11=27./28., t12=-13./28., t13=3./7., t14=6./7., t15=3./28.
  real, parameter:: s11=11./14., s13=-13./14., s14=4./7., s15=3./14.
! PPM volume mean form:
  real, parameter:: b1 =  7./12.     ! 0.58333333
  real, parameter:: b2 = -1./12.
! 4-pt Lagrange interpolation
  real, parameter:: a1 =  0.5625
  real, parameter:: a2 = -0.0625
!----------------------------------------------
! 3-pt off-center intp formular:
  real, parameter:: c1 = -0.125
  real, parameter:: c2 =  0.75
  real, parameter:: c3 =  0.375
!----------------------------------------------
#endif
      private
      public :: c_sw, d_sw, d2a2c_vect, divergence_corner

      contains

 
   subroutine c_sw(delpc, delp, ptc, pt, u,v, w, uc,vc, ua,va, wc,  &
                   ut, vt, dt2, hydrostatic, dord4)
      real, intent(INOUT), dimension(isd:ied,  jsd:jed+1):: u, vc
      real, intent(INOUT), dimension(isd:ied+1,jsd:jed  ):: v, uc
      real, intent(INOUT), dimension(isd:ied, jsd:jed):: delp,  pt,  ua, va, w
      real, intent(OUT  ), dimension(isd:ied, jsd:jed):: delpc, ptc, ut, vt, wc
      real,    intent(IN) :: dt2
      logical, intent(IN) :: hydrostatic
      logical, intent(IN) :: dord4
! Local:
      real, dimension(is-1:ie+1,js-1:je+1):: vort, ke
      real, dimension(is-1:ie+2,js-1:je+1):: fx, fx1, fx2
      real, dimension(is-1:ie+1,js-1:je+2):: fy, fy1, fy2
      real :: dt4
      integer :: i,j, is2, ie1
      integer iep1, jep1

!4TAF#ifdef FIX_C_BOUNDARY 
!4TAF      iep1 = ie;   jep1 = je
!4TAF#else
      iep1 = ie+1; jep1 = je+1
!4TAF#endif

      call d2a2c_vect(u, v, ua, va, uc, vc, ut, vt, dord4)  
!     call d2a2c_vect_v2(u, v, ua, va, uc, vc, ut, vt)

      do j=js-1,jep1
         do i=is-1,iep1+1
            ut(i,j) = dt2*ut(i,j)*dy(i,j)*sina_u(i,j)
         enddo
      enddo
      do j=js-1,jep1+1
         do i=is-1,iep1
            vt(i,j) = dt2*vt(i,j)*dx(i,j)*sina_v(i,j)
         enddo
      enddo

!----------------
! Transport delp:
!----------------
! Xdir:
      if (grid_type < 3) call fill2_4corners(delp, pt, 1)
      if ( hydrostatic ) then
!4TAF#ifdef SW_DYNAMICS
!4TAF           do j=js-1,jep1
!4TAF              do i=is-1,iep1+1      
!4TAF                 if ( ut(i,j) > 0. ) then
!4TAF                      fx1(i,j) = delp(i-1,j)
!4TAF                 else
!4TAF                      fx1(i,j) = delp(i,j)
!4TAF                 endif
!4TAF                 fx1(i,j) =  ut(i,j)*fx1(i,j)
!4TAF              enddo
!4TAF           enddo
!4TAF#else
           do j=js-1,jep1
              do i=is-1,iep1+1
                 if ( ut(i,j) > 0. ) then
                      fx1(i,j) = delp(i-1,j)
                       fx(i,j) =   pt(i-1,j)
                 else
                      fx1(i,j) = delp(i,j)
                       fx(i,j) =   pt(i,j)
                 endif
                 fx1(i,j) =  ut(i,j)*fx1(i,j)
                  fx(i,j) = fx1(i,j)* fx(i,j)
              enddo
           enddo
!4TAF#endif
      else
           if (grid_type < 3) call fill_4corners(w, 1)
           do j=js-1,je+1
              do i=is-1,ie+2      
                 if ( ut(i,j) > 0. ) then
                      fx1(i,j) = delp(i-1,j)
                       fx(i,j) =   pt(i-1,j)
                      fx2(i,j) =    w(i-1,j)
                 else
                      fx1(i,j) = delp(i,j)
                       fx(i,j) =   pt(i,j)
                      fx2(i,j) =    w(i,j)
                 endif
                 fx1(i,j) =  ut(i,j)*fx1(i,j)
                  fx(i,j) = fx1(i,j)* fx(i,j)
                 fx2(i,j) = fx1(i,j)*fx2(i,j)
              enddo
           enddo
      endif

! Ydir:
      if (grid_type < 3) call fill2_4corners(delp, pt, 2)
      if ( hydrostatic ) then
           do j=js-1,jep1+1
              do i=is-1,iep1      
                 if ( vt(i,j) > 0. ) then
                      fy1(i,j) = delp(i,j-1)
                       fy(i,j) =   pt(i,j-1)
                 else
                      fy1(i,j) = delp(i,j)
                       fy(i,j) =   pt(i,j)
                 endif
                 fy1(i,j) =  vt(i,j)*fy1(i,j)
                  fy(i,j) = fy1(i,j)* fy(i,j)
              enddo
           enddo
           do j=js-1,jep1
              do i=is-1,iep1    
                 delpc(i,j) = delp(i,j) + (fx1(i,j)-fx1(i+1,j)+fy1(i,j)-fy1(i,j+1))*rarea(i,j)
!4TAF#ifdef SW_DYNAMICS
!4TAF                   ptc(i,j) = pt(i,j)
!4TAF#else
                   ptc(i,j) = (pt(i,j)*delp(i,j) +   &
                              (fx(i,j)-fx(i+1,j)+fy(i,j)-fy(i,j+1))*rarea(i,j))/delpc(i,j)
!4TAF#endif
              enddo
           enddo
      else
           if (grid_type < 3) call fill_4corners(w, 2)
           do j=js-1,je+2
              do i=is-1,ie+1      
                 if ( vt(i,j) > 0. ) then
                      fy1(i,j) = delp(i,j-1)
                       fy(i,j) =   pt(i,j-1)
                      fy2(i,j) =    w(i,j-1)
                 else
                      fy1(i,j) = delp(i,j)
                       fy(i,j) =   pt(i,j)
                      fy2(i,j) =    w(i,j)
                 endif
                 fy1(i,j) =  vt(i,j)*fy1(i,j)
                  fy(i,j) = fy1(i,j)* fy(i,j)
                 fy2(i,j) = fy1(i,j)*fy2(i,j)
              enddo
           enddo
           do j=js-1,je+1
              do i=is-1,ie+1    
                 delpc(i,j) = delp(i,j) + (fx1(i,j)-fx1(i+1,j)+fy1(i,j)-fy1(i,j+1))*rarea(i,j)
                   ptc(i,j) = (pt(i,j)*delp(i,j) +   &
                              (fx(i,j)-fx(i+1,j)+fy(i,j)-fy(i,j+1))*rarea(i,j))/delpc(i,j)
                    wc(i,j) = (w(i,j)*delp(i,j) + (fx2(i,j)-fx2(i+1,j) +    &
                               fy2(i,j)-fy2(i,j+1))*rarea(i,j))/delpc(i,j)
              enddo
           enddo
      endif

!------------
! Compute KE:
!------------
      do j=js-1,jep1
         do i=is-1,iep1
            if ( ua(i,j) > 0. ) then
                 if ( i==1 ) then
                    ke(1,j) = uc(1,  j)*sina_u(1,  j)+v(1,  j)*cosa_u(1,  j)
                 elseif ( i==npx ) then
                    ke(i,j) = uc(npx,j)*sina_u(npx,j)-v(npx,j)*cosa_u(npx,j)
                 else
                    ke(i,j) = uc(i,j)
                 endif
            else
                 if ( i==0 ) then
                    ke(0,j) = uc(1,  j)*sina_u(1,  j)-v(1,  j)*cosa_u(1,  j)
                 elseif ( i==(npx-1) ) then
                    ke(i,j) = uc(npx,j)*sina_u(npx,j)+v(npx,j)*cosa_u(npx,j)
                 else
                    ke(i,j) = uc(i+1,j)
                 endif
            endif
         enddo
      enddo
      do j=js-1,jep1
         do i=is-1,iep1
            if ( va(i,j) > 0. ) then
               if ( j==1 ) then
                  vort(i,1) = vc(i,  1)*sina_v(i,  1)+u(i,  1)*cosa_v(i,  1)
               elseif ( j==npy ) then
                  vort(i,j) = vc(i,npy)*sina_v(i,npy)-u(i,npy)*cosa_v(i,npy)
               else
                  vort(i,j) = vc(i,j)
               endif
            else
               if ( j==0 ) then
                  vort(i,0) = vc(i,  1)*sina_v(i,  1)-u(i,  1)*cosa_v(i,  1)
               elseif ( j==(npy-1) ) then
                  vort(i,j) = vc(i,npy)*sina_v(i,npy)+u(i,npy)*cosa_v(i,npy)
               else
                  vort(i,j) = vc(i,j+1)
               endif
            endif
         enddo
      enddo

      dt4 = 0.5*dt2
      do j=js-1,jep1
         do i=is-1,iep1
            ke(i,j) = dt4*(ua(i,j)*ke(i,j) + va(i,j)*vort(i,j)) 
         enddo
      enddo

!------------------------------
! Compute circulation on C grid
!------------------------------
! To consider using true co-variant winds at face edges?
!4TAF#ifdef TEST_EDGE
!4TAF      do j=js-1,je+1
!4TAF         do i=is,ie+1
!4TAF            fx(i,j) = uc(i,j) * dxc(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF      do j=js,je+1
!4TAF         do i=is-1,ie+1
!4TAF            fy(i,j) = vc(i,j) * dyc(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF#else
      is2 = max(2,is); ie1 = min(npx-1,ie+1)
      do j=js-1,je+1
         do i=is2,ie1
            fx(i,j) = uc(i,j)*dxc(i,j)
         enddo
         if(  is   ==  1 ) fx(1,  j) = uc(1,  j)*sina_u(1,  j)*dxc(1,  j)
         if( (ie+1)==npx ) fx(npx,j) = uc(npx,j)*sina_u(npx,j)*dxc(npx,j)
      enddo

      do j=js,je+1
         if( j==1 .or. j==npy ) then
           do i=is-1,ie+1
              fy(i,j) = vc(i,j)*sina_v(i,j)*dyc(i,j)
           enddo
         else
           do i=is-1,ie+1
              fy(i,j) = vc(i,j)*dyc(i,j)
           enddo
         endif
      enddo
!4TAF#endif
      do j=js,je+1
         do i=is,ie+1
            vort(i,j) =  fx(i,j-1) - fx(i,j) - fy(i-1,j) + fy(i,j)
         enddo
      enddo

! Remove the extra term at the corners:
      if ( sw_corner ) vort(1,    1) = vort(1,    1) + fy(0,   1)
      if ( se_corner ) vort(npx  ,1) = vort(npx,  1) - fy(npx, 1)
      if ( ne_corner ) vort(npx,npy) = vort(npx,npy) - fy(npx,npy)
      if ( nw_corner ) vort(1,  npy) = vort(1,  npy) + fy(0,  npy)

!----------------------------
! Compute absolute vorticity
!----------------------------
      do j=js,je+1
         do i=is,ie+1
            vort(i,j) = fC(i,j) + rarea_c(i,j) * vort(i,j)
         enddo
      enddo

!----------------------------------
! Transport absolute vorticity:
!----------------------------------
      do j=js,je
         do i=is,iep1
            if ( i==1 .or. i==npx ) then
                 fy1(i,j) = dt2*v(i,j)*sina_u(i,j)
            else
                 fy1(i,j) = dt2*(v(i,j)-uc(i,j)*cosa_u(i,j))/sina_u(i,j)
            endif
            if ( fy1(i,j) > 0. ) then
                 fy(i,j) = vort(i,j)
            else
                 fy(i,j) = vort(i,j+1)
            endif
          enddo
      enddo

      do j=js,jep1
         if ( j==1 .or. j==npy ) then
            do i=is,ie
               fx1(i,j) = dt2*u(i,j)*sina_v(i,j)
               if ( fx1(i,j) > 0. ) then
                    fx(i,j) = vort(i,j)
               else
                    fx(i,j) = vort(i+1,j)
               endif
            enddo
         else
            do i=is,ie
               fx1(i,j) = dt2*(u(i,j)-vc(i,j)*cosa_v(i,j))/sina_v(i,j)
               if ( fx1(i,j) > 0. ) then
                    fx(i,j) = vort(i,j)
               else
                    fx(i,j) = vort(i+1,j)
               endif
            enddo
         endif
      enddo

! Update time-centered winds on the C-Grid
      do j=js,je
         do i=is,iep1
            uc(i,j) = uc(i,j) + fy1(i,j)*fy(i,j) + rdxc(i,j)*(ke(i-1,j)-ke(i,j))
         enddo
      enddo
      do j=js,jep1
         do i=is,ie
            vc(i,j) = vc(i,j) - fx1(i,j)*fx(i,j) + rdyc(i,j)*(ke(i,j-1)-ke(i,j))
         enddo
      enddo

   end subroutine c_sw



!-------------------------------------------------------------------------------
!
!     d_sw :: D-Grid Shallow Water Routine
!
   subroutine d_sw(delpc, delp,  ptc,   pt, u,  v, w, uc,vc, &
                   ua,va, divg_d, xflux, yflux, cx, cy,              &
                   crx_adv, cry_adv,  xfx_adv, yfx_adv,      &
                   dt, hord_mt, hord_vt, hord_tm,    &
                   dddmp, dddm4, hydrostatic, uniform_ppm)

      integer, intent(IN):: hord_mt, hord_vt, hord_tm
      real   , intent(IN):: dt, dddmp, dddm4
      real, intent(in):: divg_d(isd:ied+1,jsd:jed+1) ! divergence
      real, intent(INOUT), dimension(isd:ied,  jsd:jed):: delp, pt, ua, va, w
      real, intent(INOUT), dimension(isd:ied  ,jsd:jed+1):: u, vc
      real, intent(INOUT), dimension(isd:ied+1,jsd:jed  ):: v, uc
      real, intent(OUT),   dimension(isd:ied,  jsd:jed)  :: delpc, ptc
      real, intent(INOUT):: xflux(is:ie+1,js:je  )
      real, intent(INOUT):: yflux(is:ie  ,js:je+1)
      real, intent(INOUT)::    cx(is:ie+1,jsd:jed  )
      real, intent(INOUT)::    cy(isd:ied,js:je+1)
      logical, intent(IN):: uniform_ppm
      logical, intent(IN):: hydrostatic
      real, intent(OUT), dimension(is:ie+1,jsd:jed):: crx_adv, xfx_adv
      real, intent(OUT), dimension(isd:ied,js:je+1):: cry_adv, yfx_adv
! Local:
      real, dimension(is:ie+1,js:je+1):: ub, vb
      real :: ut(is-1:ie+2,jsd: jed  )
      real :: vt(isd: ied ,js-1:je+2)
      real :: wk(isd:ied+1,jsd:jed+1) !  work array
!     real :: ke(is:ie+1,js:je+1) ! Kinetic Energy
      real :: ke(isd:ied+1,jsd:jed+1) !  needs thsi fro corner_comm
      real :: vort(isd:ied,jsd:jed)     ! Vorticity
      real ::   fx(is:ie+1,js:je  )  ! 1-D X-direction Fluxes
      real ::   fy(is:ie  ,js:je+1)  ! 1-D Y-direction Fluxes
      real ::   gy(is:ie  ,js:je+1)  ! work Y-dir flux array
      real :: ra_x(is:ie,jsd:jed)
      real :: ra_y(isd:ied,js:je)

      real :: dt4, dt5, dt6
      real :: damp, damp4
      integer :: i,j, is2, ie1, js2, je1

#if !defined(TAF_DOES_NOT_LIKE)
  real, parameter:: r3 =   1./3.
  real, parameter:: t11=27./28., t12=-13./28., t13=3./7., t14=6./7., t15=3./28.
  real, parameter:: s11=11./14., s13=-13./14., s14=4./7., s15=3./14.
! PPM volume mean form:
  real, parameter:: b1 =  7./12.     ! 0.58333333
  real, parameter:: b2 = -1./12.
! 4-pt Lagrange interpolation
  real, parameter:: a1 =  0.5625
  real, parameter:: a2 = -0.0625
!----------------------------------------------
! 3-pt off-center intp formular:
  real, parameter:: c1 = -0.125
  real, parameter:: c2 =  0.75
  real, parameter:: c3 =  0.375
!----------------------------------------------
#endif

!4TAF#ifdef SW_DYNAMICS
!4TAF      if ( test_case == 1 ) then
!4TAF        do j=jsd,jed
!4TAF           do i=is,ie+1
!4TAF              xfx_adv(i,j) = dt * uc(i,j) / sina_u(i,j)
!4TAF              if (xfx_adv(i,j) > 0.) then
!4TAF                  crx_adv(i,j) = xfx_adv(i,j) * rdxa(i-1,j)
!4TAF              else
!4TAF                  crx_adv(i,j) = xfx_adv(i,j) * rdxa(i,j)
!4TAF              endif
!4TAF              xfx_adv(i,j) = dy(i,j)*xfx_adv(i,j)*sina_u(i,j)
!4TAF           enddo
!4TAF        enddo
!4TAF
!4TAF        do j=js,je+1
!4TAF           do i=isd,ied
!4TAF              yfx_adv(i,j) = dt * vc(i,j) / sina_v(i,j)
!4TAF              if (yfx_adv(i,j) > 0.) then
!4TAF                 cry_adv(i,j) = yfx_adv(i,j) * rdya(i,j-1)
!4TAF              else
!4TAF                 cry_adv(i,j) = yfx_adv(i,j) * rdya(i,j)
!4TAF              endif
!4TAF              yfx_adv(i,j) = dx(i,j)*yfx_adv(i,j)*sina_v(i,j)
!4TAF           enddo
!4TAF        enddo
!4TAF      else
!4TAF#endif


     if ( grid_type < 3 ) then
! Interior:
        do j=jsd,jed
           if(j/=0 .and. j/=1 .and. j/=(npy-1) .and. j/=npy) then
             do i=is-1,ie+2
                ut(i,j) = ( uc(i,j) - 0.25 * cosa_u(i,j) *     &
                    (vc(i-1,j)+vc(i,j)+vc(i-1,j+1)+vc(i,j+1)))*rsin_u(i,j)
             enddo
           endif
        enddo

        do j=js-1,je+2
           if( j/=1 .and. j/=npy ) then
              do i=isd,ied
                 vt(i,j) = ( vc(i,j) - 0.25 * cosa_v(i,j) *     &
                    (uc(i,j-1)+uc(i+1,j-1)+uc(i,j)+uc(i+1,j)))*rsin_v(i,j)
              enddo
           endif
        enddo

! West edge:
        if ( is==1 ) then
           do j=jsd,jed
              ut(1,j) = uc(1,j) * rsin_u(1,j)
           enddo
           do j=max(3,js), min(npy-2,je+1)
!             vt(0,j) = vc(0,j) - 0.25*cosa_v(0,j)*   &
              vt(0,j) = vc(0,j) + 0.25*cosa_v(1,j)*   &
                       (ut(0,j-1)+ut(1,j-1)+ut(0,j)+ut(1,j))
              vt(1,j) = vc(1,j) - 0.25*cosa_v(1,j)*   &
                       (ut(1,j-1)+ut(2,j-1)+ut(1,j)+ut(2,j))
           enddo
        endif

! East edge:
        if ( (ie+1)==npx ) then
           do j=jsd,jed
              ut(npx,j) = uc(npx,j) * rsin_u(npx,j)
           enddo
           do j=max(3,js), min(npy-2,je+1)
              vt(npx-1,j) = vc(npx-1,j) - 0.25*cosa_v(npx-1,j)*   &
                           (ut(npx-1,j-1)+ut(npx,j-1)+ut(npx-1,j)+ut(npx,j))
!             vt(npx,j) = vc(npx,j) - 0.25*cosa_v(npx,j)*   &
              vt(npx,j) = vc(npx,j) + 0.25*cosa_v(npx-1,j)*   &
                         (ut(npx,j-1)+ut(npx+1,j-1)+ut(npx,j)+ut(npx+1,j))
           enddo
        endif

! South (Bottom) edge:
        if ( js==1 ) then
           do i=isd,ied
              vt(i,1) = vc(i,1) * rsin_v(i,1)
           enddo
           do i=max(3,is),min(npx-2,ie+1)
!             ut(i,0) = uc(i,0) - 0.25*cosa_u(i,0)*   &
              ut(i,0) = uc(i,0) + 0.25*cosa_u(i,1)*   &
                       (vt(i-1,0)+vt(i,0)+vt(i-1,1)+vt(i,1))
              ut(i,1) = uc(i,1) - 0.25*cosa_u(i,1)*   &
                       (vt(i-1,1)+vt(i,1)+vt(i-1,2)+vt(i,2))
           enddo
        endif

! North edge:
        if ( (je+1)==npy ) then
           do i=isd,ied
              vt(i,npy) = vc(i,npy) * rsin_v(i,npy)
           enddo
           do i=max(3,is),min(npx-2,ie+1)
              ut(i,npy-1) = uc(i,npy-1) - 0.25*cosa_u(i,npy-1)*   &
                           (vt(i-1,npy-1)+vt(i,npy-1)+vt(i-1,npy)+vt(i,npy))
!             ut(i,npy) = uc(i,npy) - 0.25*cosa_u(i,npy)*   &
              ut(i,npy) = uc(i,npy) + 0.25*cosa_u(i,npy-1)*   &
                         (vt(i-1,npy)+vt(i,npy)+vt(i-1,npy+1)+vt(i,npy+1))
           enddo
        endif

        if( sw_corner ) then
            damp = 1. / (1.-0.0625*cosa_u(2,1)*cosa_v(1,2))
            ut(2,0) = (uc(2,0)-0.25*cosa_u(2,0)*(vt(1,1)+vt(2,1)+vt(2,0)+vc(1,0) -   &
                      0.25*cosa_v(1,0)*(ut(1,0)+ut(1,-1)+ut(2,-1))) ) * damp
            ut(2,1) = (uc(2,1)-0.25*cosa_u(2,1)*(vt(1,1)+vt(2,1)+vt(2,2)+vc(1,2) -   &
                      0.25*cosa_v(1,2)*(ut(1,1)+ut(1,2)+ut(2,2))) ) * damp
            vt(1,2) = (vc(1,2)-0.25*cosa_v(1,2)*(ut(1,1)+ut(1,2)+ut(2,2)+uc(2,1) -   &
                      0.25*cosa_u(2,1)*(vt(1,1)+vt(2,1)+vt(2,2))) ) * damp
            vt(0,2) = (vc(0,2)-0.25*cosa_v(0,2)*(ut(1,1)+ut(1,2)+ut(0,2)+uc(0,1) -   &
                      0.25*cosa_u(0,1)*(vt(0,1)+vt(-1,1)+vt(-1,2))) ) * damp
        endif

        if( se_corner ) then
            damp = 1. / (1. - 0.0625*cosa_u(npx-1,1)*cosa_v(npx-1,2))
            ut(npx-1,0) = ( uc(npx-1,0)+0.25*cosa_u(npx-1,1)*(   &
                            vt(npx-1,1)+vt(npx-2,1)+vt(npx-2,0)+vc(npx-1,0) +   &
                      0.25*cosa_v(npx-1,2)*(ut(npx,0)+ut(npx,-1)+ut(npx-1,-1))) ) * damp
            ut(npx-1,1) = ( uc(npx-1,1)-0.25*cosa_u(npx-1,1)*(  &
                            vt(npx-1,1)+vt(npx-2,1)+vt(npx-2,2)+vc(npx-1,2) -   &
                      0.25*cosa_v(npx-1,2)*(ut(npx,1)+ut(npx,2)+ut(npx-1,2))) ) * damp
            vt(npx-1,2) = ( vc(npx-1,2)-0.25*cosa_v(npx-1,2)*(  &
                            ut(npx,1)+ut(npx,2)+ut(npx-1,2)+uc(npx-1,1) -   &
                      0.25*cosa_u(npx-1,1)*(vt(npx-1,1)+vt(npx-2,1)+vt(npx-2,2))) ) * damp
            vt(npx,  2) = ( vc(npx,2)+0.25*cosa_v(npx-1,2)*(  &
                            ut(npx,1)+ut(npx,2)+ut(npx+1,2)+uc(npx+1,1) +   &
                      0.25*cosa_u(npx-1,1)*(vt(npx,1)+vt(npx+1,1)+vt(npx+1,2))) ) * damp
        endif

        if( ne_corner ) then
            damp = 1. / (1. - 0.0625*cosa_u(npx-1,npy-1)*cosa_v(npx-1,npy-1))
            ut(npx-1,npy) = ( uc(npx-1,npy)+0.25*cosa_u(npx-1,npy-1)*(   &
                              vt(npx-1,npy)+vt(npx-2,npy)+vt(npx-2,npy+1)+vc(npx-1,npy+1) +   &
                0.25*cosa_v(npx-1,npy-1)*(ut(npx,npy)+ut(npx,npy+1)+ut(npx-1,npy+1))) ) * damp
            ut(npx-1,npy-1) = ( uc(npx-1,npy-1)-0.25*cosa_u(npx-1,npy-1)*(  &
                                vt(npx-1,npy)+vt(npx-2,npy)+vt(npx-2,npy-1)+vc(npx-1,npy-1) -  &
                0.25*cosa_v(npx-1,npy-1)*(ut(npx,npy-1)+ut(npx,npy-2)+ut(npx-1,npy-2))) ) * damp
            vt(npx-1,npy-1) = ( vc(npx-1,npy-1)-0.25*cosa_v(npx-1,npy-1)*(  &
                                ut(npx,npy-1)+ut(npx,npy-2)+ut(npx-1,npy-2)+uc(npx-1,npy-1) -  &
                0.25*cosa_u(npx-1,npy-1)*(vt(npx-1,npy)+vt(npx-2,npy)+vt(npx-2,npy-1))) ) * damp
            vt(npx,  npy-1) = ( vc(npx,npy-1)+0.25*cosa_v(npx-1,npy-1)*(   &
                                ut(npx,npy-1)+ut(npx,npy-2)+ut(npx+1,npy-2)+uc(npx+1,npy-1) +   &
                0.25*cosa_u(npx-1,npy-1)*(vt(npx,npy)+vt(npx+1,npy)+vt(npx+1,npy-1))) ) * damp
        endif

        if( nw_corner ) then
            damp = 1. / (1. - 0.0625*cosa_u(2,npy-1)*cosa_v(1,npy-1))
            ut(2,npy) = ( uc(2,npy)+0.25*cosa_u(2,npy-1)*(   &
                          vt(1,npy)+vt(2,npy)+vt(2,npy+1)+vc(1,npy+1) +   &
                      0.25*cosa_v(1,npy-1)*(ut(1,npy)+ut(1,npy+1)+ut(2,npy+1))) ) * damp
            ut(2,npy-1) = ( uc(2,npy-1)-0.25*cosa_u(2,npy-1)*(  &
                            vt(1,npy)+vt(2,npy)+vt(2,npy-1)+vc(1,npy-1) -   &
                      0.25*cosa_v(1,npy-1)*(ut(1,npy-1)+ut(1,npy-2)+ut(2,npy-2))) ) * damp
            vt(1,npy-1) = ( vc(1,npy-1)-0.25*cosa_v(1,npy-1)*(  &
                            ut(1,npy-1)+ut(1,npy-2)+ut(2,npy-2)+uc(2,npy-1) -   &
                      0.25*cosa_u(2,npy-1)*(vt(1,npy)+vt(2,npy)+vt(2,npy-1))) ) * damp
            vt(0,npy-1) = ( vc(0,npy-1)+0.25*cosa_v(1,npy-1)*(  &
                            ut(1,npy-1)+ut(1,npy-2)+ut(0,npy-2)+uc(0,npy-1) +   &
                      0.25*cosa_u(2,npy-1)*(vt(0,npy)+vt(-1,npy)+vt(-1,npy-1))) ) * damp
        endif
 
     else
! grid_type >= 3
        do j=jsd,jed
           do i=is-1,ie+2
              ut(i,j) =  uc(i,j)
           enddo
        enddo
        
        do j=js-1,je+2
           do i=isd,ied
              vt(i,j) = vc(i,j) 
           enddo
        enddo
     endif      ! end grid_type choices

        do j=jsd,jed
           do i=is,ie+1
              xfx_adv(i,j) = dt*ut(i,j)
           enddo
        enddo

        do j=js,je+1
           do i=isd,ied
              yfx_adv(i,j) = dt*vt(i,j)
           enddo
        enddo

! Compute E-W CFL number:
        do j=jsd,jed
           do i=is,ie+1
              if (xfx_adv(i,j) > 0.) then
                  crx_adv(i,j) = xfx_adv(i,j) * rdxa(i-1,j)
              else
                  crx_adv(i,j) = xfx_adv(i,j) * rdxa(i,j)
              endif
           enddo
        enddo
        do j=jsd,jed
           do i=is,ie+1
              xfx_adv(i,j) = dy(i,j)*xfx_adv(i,j)*sina_u(i,j)
           enddo
        enddo


        do j=js,je+1
           do i=isd,ied
              if (yfx_adv(i,j) > 0.) then
                 cry_adv(i,j) = yfx_adv(i,j) * rdya(i,j-1)
              else
                 cry_adv(i,j) = yfx_adv(i,j) * rdya(i,j)
              endif
           enddo
        enddo
        do j=js,je+1
           do i=isd,ied
              yfx_adv(i,j) = dx(i,j)*yfx_adv(i,j)*sina_v(i,j)
           enddo
        enddo

!4TAF#ifdef SW_DYNAMICS
!4TAF      endif
!4TAF#endif

      do j=jsd,jed
         do i=is,ie
            ra_x(i,j) = area(i,j) + xfx_adv(i,j) - xfx_adv(i+1,j)
         enddo
      enddo
      do j=js,je
         do i=isd,ied
            ra_y(i,j) = area(i,j) + yfx_adv(i,j) - yfx_adv(i,j+1)
         enddo
      enddo


      call fv_tp_2d(delp, crx_adv, cry_adv, npx, npy, hord_tm, fx, fy,  &
                    xfx_adv, yfx_adv, area, ra_x, ra_y, uniform_ppm)

!4TAF#ifdef SW_DYNAMICS
!4TAF        do j=js,je
!4TAF           do i=is,ie
!4TAF              delp(i,j) = delp(i,j) +    &
!4TAF                         (fx(i,j)-fx(i+1,j)+fy(i,j)-fy(i,j+1))*rarea(i,j)
!4TAF              ptc(i,j) = pt(i,j)
!4TAF           enddo
!4TAF        enddo
!4TAF#else

! <<< Save the mass fluxes to the "Flux Capacitor" for tracer transport >>>
        do j=jsd,jed
            do i=is,ie+1
              cx(i,j) = cx(i,j) + crx_adv(i,j)
           enddo
        enddo       
        do j=js,je
           do i=is,ie+1
              xflux(i,j) = xflux(i,j) + fx(i,j)
           enddo
        enddo       

        do j=js,je+1
           do i=isd,ied
              cy(i,j) = cy(i,j) + cry_adv(i,j)
           enddo
           do i=is,ie
              yflux(i,j) = yflux(i,j) + fy(i,j)
           enddo
        enddo 

        if ( .not. hydrostatic ) then
            call fv_tp_2d(w, crx_adv,cry_adv, npx, npy, hord_vt, ub, gy, &
                          xfx_adv,yfx_adv, area, ra_x, ra_y, uniform_ppm, mfx=fx, mfy=fy)
            do j=js,je
               do i=is,ie
                  w(i,j) = w(i,j)*delp(i,j) +             &
                           (ub(i,j)-ub(i+1,j)+gy(i,j)-gy(i,j+1))*rarea(i,j)
               enddo
            enddo
        endif

        call fv_tp_2d(pt, crx_adv,cry_adv, npx, npy, hord_tm, ub, gy,  &
                      xfx_adv,yfx_adv, area, ra_x, ra_y, uniform_ppm, mfx=fx, mfy=fy)
        do j=js,je
           do i=is,ie
              pt(i,j) = pt(i,j)*delp(i,j) +               &
                       (ub(i,j)-ub(i+1,j)+gy(i,j)-gy(i,j+1))*rarea(i,j)
              delp(i,j) = delp(i,j) +                     &
                         (fx(i,j)-fx(i+1,j)+fy(i,j)-fy(i,j+1))*rarea(i,j)
              pt(i,j) = pt(i,j) / delp(i,j)
           enddo
        enddo

        if ( .not. hydrostatic ) then
            do j=js,je
               do i=is,ie
                  w(i,j) = w(i,j) / delp(i,j)
               enddo
            enddo
        endif
!4TAF#endif

!4TAF#ifdef SW_DYNAMICS
!4TAF      if (test_case > 1) then
!4TAF#endif

!----------------------
! Kinetic Energy Fluxes
!----------------------
! Compute B grid contra-variant components for KE:

      dt5 = 0.5 *dt
      dt4 = 0.25*dt

      is2 = max(2,is); ie1 = min(npx-1,ie+1)
      js2 = max(2,js); je1 = min(npy-1,je+1)

      if (grid_type < 3) then

         if ( js==1 ) then
            do i=is,ie+1
               vb(i,1) = dt5*(vt(i-1,1)+vt(i,1))       ! corner values are incorrect
            enddo
         endif
         
         do j=js2,je1
            do i=is2,ie1
               vb(i,j) = dt5*(vc(i-1,j)+vc(i,j)-(uc(i,j-1)+uc(i,j))*cosa(i,j))*rsina(i,j)
            enddo
            if ( is==1 ) then
!              vb(1,j) = dt5*(vt(0,j)+vt(1,j)) 
! 2-pt extrapolation from both sides:
               vb(1,j) = dt4*(-vt(-1,j) + 3.*(vt(0,j)+vt(1,j)) - vt(2,j))
            endif
            if ( (ie+1)==npx ) then
!              vb(npx,j) = dt5*(vt(npx-1,j)+vt(npx,j))
! 2-pt extrapolation from both sides:
               vb(npx,j) = dt4*(-vt(npx-2,j) + 3.*(vt(npx-1,j)+vt(npx,j)) - vt(npx+1,j))
            endif
         enddo

         if ( (je+1)==npy ) then
            do i=is,ie+1
               vb(i,npy) = dt5*(vt(i-1,npy)+vt(i,npy)) ! corner values are incorrect
            enddo
         endif
         
      else
         do j=js,je+1
            do i=is,ie+1
               vb(i,j) = dt5*(vc(i-1,j)+vc(i,j))
            enddo
         enddo
      endif

      call ytp_v(vb, u, v, ub, hord_mt)

      do j=js,je+1
         do i=is,ie+1
            ke(i,j) = vb(i,j)*ub(i,j)
         enddo
      enddo

      if (grid_type < 3) then
         if ( is==1 ) then
            do j=js,je+1
               ub(1,j) = dt5*(ut(1,j-1)+ut(1,j))       ! corner values are incorrect
            enddo
         endif
         
         do j=js,je+1
            if ( j==1 .or. j==npy ) then
               do i=is2,ie1
!                 ub(i,j) = dt5*(ut(i,j-1)+ut(i,j))
! 2-pt extrapolation from both sides:
                  ub(i,j) = dt4*(-ut(i,j-2) + 3.*(ut(i,j-1)+ut(i,j)) - ut(i,j+1))
               enddo
            else
               do i=is2,ie1
                  ub(i,j) = dt5*(uc(i,j-1)+uc(i,j)-(vc(i-1,j)+vc(i,j))*cosa(i,j))*rsina(i,j)
               enddo
            endif
         enddo
         
         if ( (ie+1)==npx ) then
            do j=js,je+1
               ub(npx,j) = dt5*(ut(npx,j-1)+ut(npx,j))       ! corner values are incorrect
            enddo
         endif
         
      else
         do j=js,je+1
            do i=is,ie+1
               ub(i,j) = dt5*(uc(i,j-1)+uc(i,j))
            enddo
         enddo
      endif

      call xtp_u(ub, u, v, vb, hord_mt)

      do j=js,je+1
         do i=is,ie+1
            ke(i,j) = 0.5*(ke(i,j) + ub(i,j)*vb(i,j))
         enddo
      enddo

!-----------------------------------------
! Fix KE at the 4 corners of the face:
!-----------------------------------------
   if ( Gnomonic_grid ) then
      dt6 = dt / 6.
      if ( sw_corner ) then
           ke(1,1) = dt6*( (ut(1,1) + ut(1,0)) * u(1,1) +  &
                           (vt(1,1) + vt(0,1)) * v(1,1) +  &
                           (ut(1,1) + vt(1,1)) * u(0,1) )
      endif
      if ( se_corner ) then
           i = npx
           ke(i,1) = dt6*( (ut(i,1) + ut(i,  0)) * u(i-1,1) + &
                           (vt(i,1) + vt(i-1,1)) * v(i,  1) + &
                           (ut(i,1) - vt(i-1,1)) * u(i,  1) )
      endif
      if ( ne_corner ) then
           i = npx;      j = npy
           ke(i,j) = dt6*( (ut(i,j  ) + ut(i,j-1)) * u(i-1,j) +  &
                           (vt(i,j  ) + vt(i-1,j)) * v(i,j-1) +  &
                           (ut(i,j-1) + vt(i-1,j)) * u(i,j  )  )
      endif
      if ( nw_corner ) then
           j = npy
           ke(1,j) = dt6*( (ut(1,  j) + ut(1,j-1)) * u(1,j  ) +  &
                           (vt(1,  j) + vt(0,  j)) * v(1,j-1) +  &
                           (ut(1,j-1) - vt(1,  j)) * u(0,j  )  )
      endif
   elseif (grid_type < 3) then
      call mp_corner_comm(ke, npx, npy) 
      if (sw_corner) ke(1,    1) = r3*(ke(2,      1)+ke(1,      2)+ke(0,      1))
      if (se_corner) ke(npx,  1) = r3*(ke(npx+1,  1)+ke(npx,    2)+ke(npx-1,  1))
      if (ne_corner) ke(npx,npy) = r3*(ke(npx+1,npy)+ke(npx,npy-1)+ke(npx-1,npy))
      if (nw_corner) ke(1,  npy) = r3*(ke(2,    npy)+ke(1,  npy-1)+ke(0,    npy))
   endif

!-----------------------------
! Compute divergence damping
!-----------------------------
   damp  = dddmp*da_min_c

   if ( dddm4 > 0. ) then

        do j=js-1,je+2
           do i=is-1,ie+2
              delpc(i,j) = divg_d(i,j)
                 wk(i,j) = divg_d(i,j)
           enddo
        enddo

        do j=js,je+1
           do i=is-1,ie+1
              ptc(i,j) = sina_v(i,j)*dyc(i,j)*(wk(i+1,j)-wk(i,j))*rdx(i,j)
           enddo
        enddo

        do j=js-1,je+1
           do i=is,ie+1
              vort(i,j) = sina_u(i,j)*dxc(i,j)*(wk(i,j+1)-wk(i,j))*rdy(i,j)
           enddo
        enddo

        do j=js,je+1
           do i=is,ie+1
              wk(i,j) = vort(i,j-1) - vort(i,j) + ptc(i-1,j) - ptc(i,j)
           enddo
        enddo

! Remove the extra term at the corners:
        if (sw_corner) wk(1,    1) = wk(1,    1) - vort(1,    0)
        if (se_corner) wk(npx,  1) = wk(npx,  1) - vort(npx,  0)
        if (ne_corner) wk(npx,npy) = wk(npx,npy) + vort(npx,npy)
        if (nw_corner) wk(1,  npy) = wk(1,  npy) + vort(1,  npy)

        damp4 = (dddm4*da_min_c)**2
        do j=js,je+1
           do i=is,ie+1
              ke(i,j) = ke(i,j) + damp*divg_d(i,j) + damp4*rarea_c(i,j)*wk(i,j)
           enddo
        enddo
   else                             ! 2nd order divergence damping
!         area ~ dxb*dyb*sin(alpha)
          do j=js,je+1
             if ( j==1 .or. j==npy ) then
                do i=is-1,ie+1
                   ptc(i,j) = u(i,j)*dyc(i,j)*sina_v(i,j)
                enddo
             else
                do i=is-1,ie+1
                   ptc(i,j) = (u(i,j)-0.5*(va(i,j-1)+va(i,j))*cosa_v(i,j))   &
                            *dyc(i,j)*sina_v(i,j)
                enddo
             endif
          enddo

          do j=js-1,je+1
             do i=is2,ie1
                vort(i,j) = (v(i,j) - 0.5*(ua(i-1,j)+ua(i,j))*cosa_u(i,j))  &
                            *dxc(i,j)*sina_u(i,j)
             enddo
             if (  is   ==  1 ) vort(1,  j) = v(1,  j)*dxc(1,  j)*sina_u(1,  j)
             if ( (ie+1)==npx ) vort(npx,j) = v(npx,j)*dxc(npx,j)*sina_u(npx,j)
          enddo

          do j=js,je+1
             do i=is,ie+1
                delpc(i,j) = vort(i,j-1) - vort(i,j) + ptc(i-1,j) - ptc(i,j)
             enddo
          enddo

! Remove the extra term at the corners:
          if (sw_corner) delpc(1,    1) = delpc(1,    1) - vort(1,    0)
          if (se_corner) delpc(npx,  1) = delpc(npx,  1) - vort(npx,  0)
          if (ne_corner) delpc(npx,npy) = delpc(npx,npy) + vort(npx,npy)
          if (nw_corner) delpc(1,  npy) = delpc(1,  npy) + vort(1,  npy)

          do j=js,je+1
             do i=is,ie+1
                delpc(i,j) = rarea_c(i,j)*delpc(i,j)
                   ke(i,j) = ke(i,j) + damp*delpc(i,j)
             enddo
          enddo
   endif

! Vorticity
! Convert winds to circulation elements:
       do j=jsd,jed+1
          do i=isd,ied
             u(i,j) = u(i,j)*dx(i,j)
          enddo
       enddo
       do j=jsd,jed
          do i=isd,ied+1
             v(i,j) = v(i,j)*dy(i,j)
          enddo
       enddo

       do j=jsd,jed
          do i=isd,ied
             vort(i,j) = f0(i,j) + rarea(i,j)*(u(i,j)-u(i,j+1)-v(i,j)+v(i+1,j))
          enddo
       enddo

       call fv_tp_2d(vort, crx_adv, cry_adv, npx, npy, hord_vt, fx, fy, &
                     xfx_adv,yfx_adv, area, ra_x, ra_y, uniform_ppm)

       do j=js,je+1
          do i=is,ie
             u(i,j) = u(i,j) + ke(i,j) - ke(i+1,j) + fy(i,j)
          enddo
       enddo
       do j=js,je
          do i=is,ie+1
             v(i,j) = v(i,j) + ke(i,j) - ke(i,j+1) - fx(i,j)
          enddo
       enddo

!4TAF#ifdef SW_DYNAMICS
!4TAF      endif ! test_case
!4TAF#endif

 end subroutine d_sw


 subroutine divergence_corner(u, v, ua, va, divg_d, km)
 integer, intent(in):: km
 real, intent(in),  dimension(isd:ied,  jsd:jed+1,km):: u
 real, intent(in),  dimension(isd:ied+1,jsd:jed  ,km):: v
 real, intent(in),  dimension(isd:ied,jsd:jed,km):: ua, va
 real, intent(out), dimension(isd:ied+1,jsd:jed+1,km):: divg_d
! local
 real uf(is-2:ie+2,js-1:je+2)
 real vf(is-1:ie+2,js-2:je+2)
 integer i,j,k
 integer is2, ie1

 is2 = max(2,is); ie1 = min(npx-1,ie+1)

 do k=1,km
    if (grid_type==4) then
        do j=js-1,je+2
           do i=is-2,ie+2
              uf(i,j) = u(i,j,k)*dyc(i,j)
           enddo
        enddo
        do j=js-2,je+2
           do i=is-1,ie+2
              vf(i,j) = v(i,j,k)*dxc(i,j)
           enddo
        enddo
        do j=js-1,je+2
           do i=is-1,ie+2
              divg_d(i,j,k) = rarea_c(i,j)*(vf(i,j-1)-vf(i,j)+uf(i-1,j)-uf(i,j))
           enddo
        enddo
    else

    do j=js,je+1
       if ( j==1 .or. j==npy ) then
            do i=is-1,ie+1
               uf(i,j) = u(i,j,k)*dyc(i,j)*sina_v(i,j)
            enddo
       else
            do i=is-1,ie+1
               uf(i,j) = (u(i,j,k)-0.5*(va(i,j-1,k)+va(i,j,k))*cosa_v(i,j))   &
                        *dyc(i,j)*sina_v(i,j)
            enddo
       endif
    enddo

    do j=js-1,je+1
       do i=is2,ie1
          vf(i,j) = (v(i,j,k) - 0.5*(ua(i-1,j,k)+ua(i,j,k))*cosa_u(i,j))  &
                    *dxc(i,j)*sina_u(i,j)
       enddo
       if (  is   ==  1 ) vf(1,  j) = v(1,  j,k)*dxc(1,  j)*sina_u(1,  j)
       if ( (ie+1)==npx ) vf(npx,j) = v(npx,j,k)*dxc(npx,j)*sina_u(npx,j)
    enddo

    do j=js,je+1
       do i=is,ie+1
          divg_d(i,j,k) = vf(i,j-1) - vf(i,j) + uf(i-1,j) - uf(i,j)
       enddo
    enddo

! Remove the extra term at the corners:
    if (sw_corner) divg_d(1,    1,k) = divg_d(1,    1,k) - vf(1,    0)
    if (se_corner) divg_d(npx,  1,k) = divg_d(npx,  1,k) - vf(npx,  0)
    if (ne_corner) divg_d(npx,npy,k) = divg_d(npx,npy,k) + vf(npx,npy)
    if (nw_corner) divg_d(1,  npy,k) = divg_d(1,  npy,k) + vf(1,  npy)

    do j=js,je+1
       do i=is,ie+1
          divg_d(i,j,k) = rarea_c(i,j)*divg_d(i,j,k)
       enddo
    enddo

    endif

 enddo

 end subroutine divergence_corner


 subroutine xtp_u(c, u, v, flux, iord)

 real, INTENT(IN)  ::   u(isd:ied,jsd:jed+1)
 real, INTENT(IN)  ::   v(isd:ied+1,jsd:jed) ! u-wind
 real, INTENT(IN)  ::   c(is:ie+1,js:je+1)   !  Courant   N (like FLUX)
 real, INTENT(out):: flux(is:ie+1,js:je+1)
 integer, INTENT(IN) :: iord
! Local
 real al(is-1:ie+2), dm(is-2:ie+2)
 real bl(is-1:ie+1)
 real br(is-1:ie+1)
 real dq(is-3:ie+2)
 real dl, dr, xt, pmp, lac, dqt, cfl
 integer i, j

#if !defined(TAF_DOES_NOT_LIKE)
  real, parameter:: r3 =   1./3.
  real, parameter:: t11=27./28., t12=-13./28., t13=3./7., t14=6./7., t15=3./28.
  real, parameter:: s11=11./14., s13=-13./14., s14=4./7., s15=3./14.
! PPM volume mean form:
  real, parameter:: b1 =  7./12.     ! 0.58333333
  real, parameter:: b2 = -1./12.
! 4-pt Lagrange interpolation
  real, parameter:: a1 =  0.5625
  real, parameter:: a2 = -0.0625
!----------------------------------------------
! 3-pt off-center intp formular:
  real, parameter:: c1 = -0.125
  real, parameter:: c2 =  0.75
  real, parameter:: c3 =  0.375
!----------------------------------------------
#endif

 if ( iord==1 ) then

     do j=js,je+1
        do i=is,ie+1
           if( c(i,j)>0. ) then
               flux(i,j) = u(i-1,j)
           else
               flux(i,j) = u(i,j)
           endif 
        enddo
     enddo

 elseif ( iord<=4 ) then
! For regional model (doubly per5iodic domain):
     do j=js,je+1

        do i=is-2,ie+2
           xt = 0.25*(u(i+1,j) - u(i-1,j))
           dm(i) = sign(min(abs(xt), max(u(i-1,j), u(i,j), u(i+1,j)) - u(i,j),  &
                            u(i,j) - min(u(i-1,j), u(i,j), u(i+1,j))), xt)
        enddo

        if ( j==1 .or. j==npy ) then      ! top & bottom edges
            if(  is   ==1 )    dm(1 ) = 0.
            if( (ie+1)==npx )  dm(ie) = 0.
        endif

        do i=is-1,ie+2
           al(i) = 0.5*(u(i-1,j)+u(i,j)) + r3*(dm(i-1) - dm(i))
        enddo

        do i=is,ie+1
          if( c(i,j)>0. ) then
             xt = 2.*dm(i-1)
             dl = sign(min(abs(xt), abs(al(i-1)-u(i-1,j))), xt)
             dr = sign(min(abs(xt), abs(al(i  )-u(i-1,j))), xt)
             cfl = c(i,j) * rdx(i-1,j)
             flux(i,j) = u(i-1,j) + (1.-cfl)*(dr + cfl*(dl-dr))
          else
             xt = 2.*dm(i)
             dl = sign(min(abs(xt), abs(al(i  )-u(i,j))), xt)
             dr = sign(min(abs(xt), abs(al(i+1)-u(i,j))), xt)
             cfl = c(i,j) * rdx(i,j)
             flux(i,j) = u(i,j) - (1.+cfl)*(dl + cfl*(dl-dr))
          endif 
        enddo
     enddo
 else
!*** The default scheme:

     do j=js,je+1
        do i=is-2,ie+2
              xt = 0.25*(u(i+1,j) - u(i-1,j))
           dm(i) = sign(min(abs(xt), max(u(i-1,j), u(i,j), u(i+1,j)) - u(i,j),  &
                            u(i,j) - min(u(i-1,j), u(i,j), u(i+1,j))), xt)
        enddo
        do i=is-3,ie+2
           dq(i) = u(i+1,j) - u(i,j)
        enddo

        if (grid_type < 3) then

           do i=max(3,is-1),min(npx-2,ie+2)
              al(i) = 0.5*(u(i-1,j)+u(i,j)) + r3*(dm(i-1) - dm(i))
           enddo

! Perturbation form:
         if( iord<10 ) then
           do i=max(3,is-1),min(npx-3,ie+1)
              pmp = 2.*dq(i-1)
              lac = pmp - 1.5*dq(i-2)
              br(i) = min(max(0., pmp, lac), max(al(i+1)-u(i,j), min(0.,pmp, lac)))
              pmp = -2.*dq(i)
              lac = pmp + 1.5*dq(i+1)
              bl(i) = min(max(0., pmp, lac), max(al(i  )-u(i,j), min(0.,pmp, lac)))
           enddo
         else
           do i=max(3,is-1),min(npx-3,ie+1)
              bl(i) = al(i  ) - u(i,j)
              br(i) = al(i+1) - u(i,j)
           enddo
         endif

!--------------
! fix the edges
!--------------
           if ( is==1 ) then
              br(2) = al(3) - u(2,j)
                 xt = s15*u(1,j) + s11*u(2,j) - s14*dm(2)
              bl(2) = xt - u(2,j)
              br(1) = xt - u(1,j)
              if( j==1 .or. j==npy ) then
                 bl(0) = 0.   ! out
                 br(0) = 0.   ! edge
                 bl(1) = 0.   ! edge
                 br(1) = 0.   ! in
              else
                 bl(0) = s14*dm(-1) - s11*dq(-1)
!---------------------------------------------------------------
!4TAF#ifdef ONE_SIDE
!4TAF                 xt = t14*u(0,j) + t12*u(-1,j) + t15*u(-2,j)
!4TAF                 br(0) = 2.*xt - u(0,j)
!4TAF                 xt = t14*u(1,j) + t12*u(2,j) + t15*u(3,j)
!4TAF                 bl(1) = 2.*xt - u(1,j)
!4TAF#else
!                xt = t14*(u(0,j)+u(1,j))+t12*(u(-1,j)+u(2,j))+t15*(u(-2,j)+u(3,j))
                 xt = 0.5*((2.*dx(1,j)+dx(2,j))*(u(0,j)+u(1,j))   &
                    - dx(1,j)*(u(-1,j)+u(2,j)))/(dx(1,j)+dx(2,j))
                 br(0) = xt - u(0,j)
                 bl(1) = xt - u(1,j)
!                br(0) = xt - 0.5*(v(1,j-1)+v(1,j))*cosa(1,j) - u(0,j)
!                bl(1) = xt + 0.5*(v(1,j-1)+v(1,j))*cosa(1,j) - u(1,j)
!4TAF#endif
!---------------------------------------------------------------
              endif
              if( iord<10 ) call pert_ppm(1, u(2,j), bl(2), br(2), -1)
           endif

           if ( (ie+1)==npx ) then
              bl(npx-2) = al(npx-2) - u(npx-2,j)
              xt = s15*u(npx-1,j) + s11*u(npx-2,j) + s14*dm(npx-2)
              br(npx-2) = xt - u(npx-2,j)
              bl(npx-1) = xt - u(npx-1,j)
              if( j==1 .or. j==npy ) then
                 bl(npx-1) = 0.  ! in
                 br(npx-1) = 0.  ! edge
                 bl(npx  ) = 0.  ! edge
                 br(npx  ) = 0.  ! out
              else
                br(npx) = s11*dq(npx) - s14*dm(npx+1)
!4TAF#ifdef ONE_SIDE
!4TAF                xt = t14*u(npx-1,j) + t12*u(npx-2,j) + t15*u(npx-3,j)
!4TAF                br(npx-1) = 2.*xt - u(npx-1,j)
!4TAF                xt = t14*u(npx,j) + t12*u(npx+1,j) + t15*u(npx+2,j)
!4TAF                bl(npx  ) = 2.*xt - u(npx  ,j)
!4TAF#else
!               xt = t14*(u(npx-1,j)+u(npx,j)) + t12*(u(npx-2,j)+u(npx+1,j))   &
!                  + t15*(u(npx-3,j)+u(npx+2,j))
                xt = 0.5*( (2.*dx(npx-1,j)+dx(npx-2,j))*(u(npx-1,j)+u(npx,j))  &
                   - dx(npx-1,j)*(u(npx-2,j)+u(npx+1,j)))/(dx(npx-1,j)+dx(npx-2,j))
                br(npx-1) = xt - u(npx-1,j)
                bl(npx  ) = xt - u(npx  ,j)
!               br(npx-1) = xt + 0.5*(v(npx,j-1)+v(npx,j))*cosa(npx,j) - u(npx-1,j)
!               bl(npx  ) = xt - 0.5*(v(npx,j-1)+v(npx,j))*cosa(npx,j) - u(npx  ,j)
!4TAF#endif
              endif
              if( iord<10 ) call pert_ppm(1, u(npx-2,j), bl(npx-2), br(npx-2), -1)
           endif
       else
          
          do i=is-1,ie+2
             al(i) = 0.5*(u(i-1,j)+u(i,j)) + r3*(dm(i-1) - dm(i))
          enddo

          do i=is-1,ie+1
             pmp = -2.*dq(i)
             lac = pmp + 1.5*dq(i+1)
             bl(i) = min(max(0., pmp, lac), max(al(i  )-u(i,j), min(0.,pmp, lac)))
             pmp = 2.*dq(i-1)
             lac = pmp - 1.5*dq(i-2)
             br(i) = min(max(0., pmp, lac), max(al(i+1)-u(i,j), min(0.,pmp, lac)))
          enddo
       endif
       
       do i=is,ie+1
          if( c(i,j)>0. ) then
             cfl = c(i,j)*rdx(i-1,j)
             flux(i,j) = u(i-1,j) + (1.-cfl)*(br(i-1)-cfl*(bl(i-1)+br(i-1)))
          else
             cfl = c(i,j)*rdx(i,j)
             flux(i,j) = u(i,  j) + (1.+cfl)*(bl(i  )+cfl*(bl(i  )+br(i  )))
          endif
       enddo
    enddo
 endif

 end subroutine xtp_u


 subroutine ytp_v(c, u, v, flux, jord)
 integer, intent(IN):: jord
 real, INTENT(IN)  ::   u(isd:ied,jsd:jed+1)
 real, INTENT(IN)  ::   v(isd:ied+1,jsd:jed)
 real, INTENT(IN) ::    c(is:ie+1,js:je+1)   !  Courant   N (like FLUX)
 real, INTENT(OUT):: flux(is:ie+1,js:je+1)
! Local:
 real dm(is:ie+1,js-2:je+2)
 real al(is:ie+1,js-1:je+2)
 real bl(is:ie+1,js-1:je+1)
 real br(is:ie+1,js-1:je+1)
 real dq(is:ie+1,js-3:je+2)
 real xt, dl, dr, pmp, lac, dqt, cfl
 integer i, j

#if !defined(TAF_DOES_NOT_LIKE)
  real, parameter:: r3 =   1./3.
  real, parameter:: t11=27./28., t12=-13./28., t13=3./7., t14=6./7., t15=3./28.
  real, parameter:: s11=11./14., s13=-13./14., s14=4./7., s15=3./14.
! PPM volume mean form:
  real, parameter:: b1 =  7./12.     ! 0.58333333
  real, parameter:: b2 = -1./12.
! 4-pt Lagrange interpolation
  real, parameter:: a1 =  0.5625
  real, parameter:: a2 = -0.0625
!----------------------------------------------
! 3-pt off-center intp formular:
  real, parameter:: c1 = -0.125
  real, parameter:: c2 =  0.75
  real, parameter:: c3 =  0.375
!----------------------------------------------
#endif

 if ( jord==1 ) then

      do j=js,je+1
         do i=is,ie+1
            if( c(i,j)>0. ) then
               flux(i,j) = v(i,j-1)
            else
               flux(i,j) = v(i,j)
            endif
         enddo
      enddo

 elseif ( jord<=4 ) then
! For regional model (doubly per5iodic domain):

     do j=js-2,je+2
        do i=is,ie+1
                xt = 0.25*(v(i,j+1) - v(i,j-1))
           dm(i,j) = sign(min(abs(xt), max(v(i,j-1), v(i,j), v(i,j+1)) - v(i,j),   &
                              v(i,j) - min(v(i,j-1), v(i,j), v(i,j+1))), xt)
        enddo
     enddo

     if ( is==1 ) then
         if (  js   ==1 )   dm(1, 1) = 0.
         if ( (je+1)==npy ) dm(1,je) = 0.
     endif

     if ( (ie+1)==npx ) then
         if (  js   ==1   )  dm(npx, 1) = 0.
         if ( (je+1)==npy )  dm(npx,je) = 0.
     endif


   do j=js-1,je+2
      do i=is,ie+1
         al(i,j) = 0.5*(v(i,j-1)+v(i,j)) + r3*(dm(i,j-1) - dm(i,j))
      enddo
   enddo

   do j=js,je+1
      do i=is,ie+1
         if(c(i,j)>0.) then
            xt = 2.*dm(i,j-1)
            dl = sign(min(abs(xt), abs(al(i,j-1)-v(i,j-1))), xt)
            dr = sign(min(abs(xt), abs(al(i,j)-v(i,j-1))),   xt)
            cfl = c(i,j)*rdy(i,j-1)
            flux(i,j) = v(i,j-1) + (1.-cfl)*(dr + cfl*(dl-dr))
         else
            xt = 2.*dm(i,j)
            dl = sign(min(abs(xt), abs(al(i,j)-v(i,j))),   xt)
            dr = sign(min(abs(xt), abs(al(i,j+1)-v(i,j))), xt)
            cfl = c(i,j)*rdy(i,j)
            flux(i,j) = v(i,j) - (1.+cfl)*(dl + cfl*(dl-dr))
         endif
      enddo
   enddo
 else
   do j=js-2,je+2
      do i=is,ie+1
              xt = 0.25*(v(i,j+1) - v(i,j-1))
         dm(i,j) = sign(min(abs(xt), max(v(i,j-1), v(i,j), v(i,j+1)) - v(i,j),   &
                            v(i,j) - min(v(i,j-1), v(i,j), v(i,j+1))), xt)
      enddo
   enddo

   do j=js-3,je+2
      do i=is,ie+1
         dq(i,j) = v(i,j+1) - v(i,j)
      enddo
   enddo

   if (grid_type < 3) then
      do j=max(3,js-1),min(npy-2,je+2)
         do i=is,ie+1
            al(i,j) = 0.5*(v(i,j-1)+v(i,j)) + r3*(dm(i,j-1)-dm(i,j))
         enddo
      enddo
      
    if (jord<10 ) then
      do j=max(3,js-1),min(npy-3,je+1)
         do i=is,ie+1
            pmp = 2.*dq(i,j-1)
            lac = pmp - 1.5*dq(i,j-2)
            br(i,j) = min(max(0.,pmp,lac), max(al(i,j+1)-v(i,j), min(0.,pmp,lac)))
            pmp = -2.*dq(i,j) 
            lac = pmp + 1.5*dq(i,j+1)
            bl(i,j) = min(max(0.,pmp,lac), max(al(i,j)-v(i,j), min(0.,pmp,lac)))
         enddo
      enddo
    else
      do j=max(3,js-1),min(npy-3,je+1)
         do i=is,ie+1
            bl(i,j) = al(i,j  ) - v(i,j)
            br(i,j) = al(i,j+1) - v(i,j)
         enddo
      enddo
    endif
      
!--------------
! fix the edges
!--------------
      if( js==1 ) then
         do i=is,ie+1
            br(i,2) = al(i,3) - v(i,2)
                 xt = s15*v(i,1) + s11*v(i,2) - s14*dm(i,2)
            br(i,1) = xt - v(i,1)
            bl(i,2) = xt - v(i,2)

            bl(i,0) = s14*dm(i,-1) - s11*dq(i,-1)

!4TAF#ifdef ONE_SIDE
!4TAF            xt = t14*v(i,1) + t12*v(i,2) + t15*v(i,3)
!4TAF            bl(i,1) = 2.*xt - v(i,1)
!4TAF            xt = t14*v(i,0) + t12*v(i,-1) + t15*v(i,-2)
!4TAF            br(i,0) = 2.*xt - v(i,0)
!4TAF#else
!           xt =  t14*(v(i,0)+v(i,1)) +  t12*(v(i,-1)+v(i,2)) + t15*(v(i,-2)+v(i,3))
            xt = 0.5*( (2.*dy(i,1)+dy(i,2))*(v(i,0)+v(i,1))   &
               - dy(i,1)*(v(i,-1)+v(i,2)))/(dy(i,1)+dy(i,2))
            bl(i,1) = xt - v(i,1)
            br(i,0) = xt - v(i,0)
!           br(i,0) = xt - 0.5*(u(i-1,1)+u(i,1))*cosa(i,1) - v(i,0)
!           bl(i,1) = xt + 0.5*(u(i-1,1)+u(i,1))*cosa(i,1) - v(i,1)
!4TAF#endif
         enddo
         if ( is==1 ) then
               bl(1,0) = 0.  ! out
               br(1,0) = 0.  ! edge
               bl(1,1) = 0.  ! edge
               br(1,1) = 0.  ! in
         endif
         if ( (ie+1)==npx ) then
               bl(npx,0) = 0.   ! out
               br(npx,0) = 0.   ! edge
               bl(npx,1) = 0.   ! edge
               br(npx,1) = 0.   ! in
         endif
         j=2
         if( jord<10 ) call pert_ppm(ie-is+2, v(is,j), bl(is,j), br(is,j), -1)
      endif

      if( (je+1)==npy ) then
         do i=is,ie+1
            bl(i,npy-2) = al(i,npy-2) - v(i,npy-2)
            xt = s15*v(i,npy-1) + s11*v(i,npy-2) + s14*dm(i,npy-2)
            br(i,npy-2) = xt - v(i,npy-2)
            bl(i,npy-1) = xt - v(i,npy-1)
            br(i,npy) = s11*dq(i,npy) - s14*dm(i,npy+1)
!4TAF#ifdef ONE_SIDE
!4TAF            xt = t14*v(i,npy-1) + t12*v(i,npy-2) + t15*v(i,npy-3)
!4TAF            br(i,npy-1) = 2.*xt - v(i,npy-1)
!4TAF            xt = t14*v(i,npy) + t12*v(i,npy+1) + t15*v(i,npy+2)
!4TAF            bl(i,npy  ) = 2.*xt - v(i,npy)
!4TAF#else
!           xt = t14*(v(i,npy-1)+v(i,npy)) + t12*(v(i,npy-2)+v(i,npy+1))  &
!              + t15*(v(i,npy-3)+v(i,npy+2))
            xt = 0.5*((2.*dy(i,npy-1)+dy(i,npy-2))*(v(i,npy-1)+v(i,npy)) -  &
                 dy(i,npy-1)*(v(i,npy-2)+v(i,npy+1)))/(dy(i,npy-1)+dy(i,npy-2))
            br(i,npy-1) = xt - v(i,npy-1)
            bl(i,npy  ) = xt - v(i,npy)
!           br(i,npy-1) = xt + 0.5*(u(i-1,npy)+u(i,npy))*cosa(i,npy) - v(i,npy-1)
!           bl(i,npy  ) = xt - 0.5*(u(i-1,npy)+u(i,npy))*cosa(i,npy) - v(i,npy)
!4TAF#endif
         enddo
         if ( is==1 ) then
              bl(1,npy-1) = 0.  ! in
              br(1,npy-1) = 0.  ! edge
              bl(1,npy  ) = 0.  ! edge
              br(1,npy  ) = 0.  ! out
         endif
         if ( (ie+1)==npx ) then
              bl(npx,npy-1) = 0.  ! in
              br(npx,npy-1) = 0.  ! edge
              bl(npx,npy  ) = 0.  ! edge
              br(npx,npy  ) = 0.  ! out
         endif
         j=npy-2
         if( jord<10 ) call pert_ppm(ie-is+2, v(is,j), bl(is,j), br(is,j), -1)
      endif

   else

      do j=js-1,je+2
         do i=is,ie+1
            al(i,j) = 0.5*(v(i,j-1)+v(i,j)) + r3*(dm(i,j-1)-dm(i,j))
         enddo
      enddo
      
      do j=js-1,je+1
         do i=is,ie+1
            pmp = 2.*dq(i,j-1)
            lac = pmp - 1.5*dq(i,j-2)
            br(i,j) = min(max(0.,pmp,lac), max(al(i,j+1)-v(i,j), min(0.,pmp,lac)))
            pmp = -2.*dq(i,j) 
            lac = pmp + 1.5*dq(i,j+1)
            bl(i,j) = min(max(0.,pmp,lac), max(al(i,j)-v(i,j), min(0.,pmp,lac)))
         enddo
      enddo
      
   endif

   do j=js,je+1
      do i=is,ie+1
         if(c(i,j)>0.) then
            cfl = c(i,j)*rdy(i,j-1)
            flux(i,j) = v(i,j-1) + (1.-cfl)*(br(i,j-1)-cfl*(bl(i,j-1)+br(i,j-1)))
         else
            cfl = c(i,j)*rdy(i,j)
            flux(i,j) = v(i,j  ) + (1.+cfl)*(bl(i,j  )+cfl*(bl(i,j  )+br(i,j  )))
         endif
      enddo
   enddo

endif

end subroutine ytp_v



 subroutine d2a2c_vect(u, v, ua, va, uc, vc, ut, vt, dord4)
  real, intent(in) ::  u(isd:ied,jsd:jed+1)
  real, intent(in) ::  v(isd:ied+1,jsd:jed)
  logical, intent(in):: dord4
  real, intent(out), dimension(isd:ied+1,jsd:jed  ):: uc
  real, intent(out), dimension(isd:ied  ,jsd:jed+1):: vc
  real, intent(out), dimension(isd:ied  ,jsd:jed  ):: ua, va, ut, vt
! Local 
  real, dimension(isd:ied,jsd:jed):: utmp, vtmp
  integer npt, i, j, ifirst, ilast, id

#if !defined(TAF_DOES_NOT_LIKE)
  real, parameter:: r3 =   1./3.
  real, parameter:: t11=27./28., t12=-13./28., t13=3./7., t14=6./7., t15=3./28.
  real, parameter:: s11=11./14., s13=-13./14., s14=4./7., s15=3./14.
! PPM volume mean form:
  real, parameter:: b1 =  7./12.     ! 0.58333333
  real, parameter:: b2 = -1./12.
! 4-pt Lagrange interpolation
  real, parameter:: a1 =  0.5625
  real, parameter:: a2 = -0.0625
!----------------------------------------------
! 3-pt off-center intp formular:
  real, parameter:: c1 = -0.125
  real, parameter:: c2 =  0.75
  real, parameter:: c3 =  0.375
!----------------------------------------------
#endif

  if ( dord4 ) then
       id = 1
  else
       id = 0
  endif


  if (grid_type < 3) then
     npt = 4
  else
     npt = -2
  endif

!----------
! Interior:
!----------
  do j=max(npt,js-1),min(npy-npt,je+1)
     do i=max(npt,isd),min(npx-npt,ied)
        utmp(i,j) = a2*(u(i,j-1)+u(i,j+2)) + a1*(u(i,j)+u(i,j+1))
     enddo
  enddo
  do j=max(npt,jsd),min(npy-npt,jed)
     do i=max(npt,is-1),min(npx-npt,ie+1)
        vtmp(i,j) = a2*(v(i-1,j)+v(i+2,j)) + a1*(v(i,j)+v(i+1,j))
     enddo
  enddo

!----------
! edges:
!----------
  if (grid_type < 3) then

  if ( js==1 .or. jsd<npt) then
!4TAF#ifdef CONSV_VT
!4TAF      do j=jsd,npt-1
!4TAF         do i=isd,ied+1
!4TAF            uc(i,j) = v(i,j)*dy(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF      do j=jsd,npt
!4TAF         do i=isd,ied
!4TAF            vc(i,j) = u(i,j)*dx(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF#endif
      do j=jsd,npt-1
         do i=isd,ied
!4TAF#ifdef CONSV_VT
!4TAF            utmp(i,j) = 0.5*(vc(i,j) + vc(i,j+1)) * rdxa(i,j)
!4TAF            vtmp(i,j) = 0.5*(uc(i,j) + uc(i+1,j)) * rdya(i,j)
!4TAF#else
            utmp(i,j) = 0.5*(u(i,j) + u(i,j+1))
            vtmp(i,j) = 0.5*(v(i,j) + v(i+1,j))
!4TAF#endif
         enddo
      enddo
  endif
  if ( (je+1)==npy .or. jed>=(npy-npt)) then
!4TAF#ifdef CONSV_VT
!4TAF      do j=npy-npt+1,jed
!4TAF         do i=isd,ied+1
!4TAF            uc(i,j) = v(i,j)*dy(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF      do j=npy-npt+1,jed+1
!4TAF         do i=isd,ied
!4TAF            vc(i,j) = u(i,j)*dx(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF#endif
      do j=npy-npt+1,jed
         do i=isd,ied
!4TAF#ifdef CONSV_VT
!4TAF            utmp(i,j) = 0.5*(vc(i,j) + vc(i,j+1)) * rdxa(i,j)
!4TAF            vtmp(i,j) = 0.5*(uc(i,j) + uc(i+1,j)) * rdya(i,j)
!4TAF#else
            utmp(i,j) = 0.5*(u(i,j) + u(i,j+1))
            vtmp(i,j) = 0.5*(v(i,j) + v(i+1,j))
!4TAF#endif
         enddo
      enddo
  endif
  if ( is==1 .or. isd<npt ) then
!4TAF#ifdef CONSV_VT
!4TAF      do j=max(npt,jsd),min(npy-npt,jed)
!4TAF         do i=isd,npt
!4TAF            uc(i,j) = v(i,j)*dy(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF      do j=max(npt,jsd),min(npy-npt+1,jed+1)
!4TAF         do i=isd,npt-1
!4TAF            vc(i,j) = u(i,j)*dx(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF#endif
      do j=max(npt,jsd),min(npy-npt,jed)
         do i=isd,npt-1
!4TAF#ifdef CONSV_VT
!4TAF            utmp(i,j) = 0.5*(vc(i,j) + vc(i,j+1)) * rdxa(i,j)
!4TAF            vtmp(i,j) = 0.5*(uc(i,j) + uc(i+1,j)) * rdya(i,j)
!4TAF#else
            utmp(i,j) = 0.5*(u(i,j) + u(i,j+1))
            vtmp(i,j) = 0.5*(v(i,j) + v(i+1,j))
!4TAF#endif
         enddo
      enddo
  endif
  if ( (ie+1)==npx .or. ied>=(npx-npt)) then
!4TAF#ifdef CONSV_VT
!4TAF      do j=max(npt,jsd),min(npy-npt,jed)
!4TAF         do i=npx-npt+1,ied+1
!4TAF            uc(i,j) = v(i,j)*dy(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF      do j=max(npt,jsd),min(npy-npt+1,jed+1)
!4TAF         do i=npx-npt+1,ied
!4TAF            vc(i,j) = u(i,j)*dx(i,j)
!4TAF         enddo
!4TAF      enddo
!4TAF#endif
      do j=max(npt,jsd),min(npy-npt,jed)
         do i=npx-npt+1,ied
!4TAF#ifdef CONSV_VT
!4TAF            utmp(i,j) = 0.5*(vc(i,j) + vc(i,j+1)) * rdxa(i,j)
!4TAF            vtmp(i,j) = 0.5*(uc(i,j) + uc(i+1,j)) * rdya(i,j)
!4TAF#else
            utmp(i,j) = 0.5*(u(i,j) + u(i,j+1))
            vtmp(i,j) = 0.5*(v(i,j) + v(i+1,j))
!4TAF#endif
         enddo
      enddo
  endif

  endif

  do j=js-1-id,je+1+id
     do i=is-1-id,ie+1+id
        ua(i,j) = (utmp(i,j)-vtmp(i,j)*cosa_s(i,j)) * rsin2(i,j)
        va(i,j) = (vtmp(i,j)-utmp(i,j)*cosa_s(i,j)) * rsin2(i,j)
     enddo
  enddo

! A -> C
!--------------
! Fix the edges
!--------------
! Xdir:
     if( sw_corner ) then
         do i=-2,0
            utmp(i,0) = -vtmp(0,1-i)
         enddo
     endif
     if( se_corner ) then
         do i=0,2
            utmp(npx+i,0) = vtmp(npx,i+1)
         enddo
     endif
     if( ne_corner ) then
         do i=0,2
            utmp(npx+i,npy) = -vtmp(npx,je-i)
         enddo
     endif
     if( nw_corner ) then
         do i=-2,0
            utmp(i,npy) = vtmp(0,je+i)
         enddo
     endif

  if (grid_type < 3) then
     ifirst = max(3,    is-1)
     ilast  = min(npx-2,ie+2)
  else
     ifirst = is-1
     ilast  = ie+2
  endif
!---------------------------------------------
! 4th order interpolation for interior points:
!---------------------------------------------
     do j=js-1,je+1
        do i=ifirst,ilast
           uc(i,j) = a1*(utmp(i-1,j)+utmp(i,j))+a2*(utmp(i-2,j)+utmp(i+1,j))
           ut(i,j) = (uc(i,j) - v(i,j)*cosa_u(i,j))*rsin_u(i,j)
        enddo
     enddo

     if (grid_type < 3) then

     if( is==1 ) then
        do j=js-1,je+1
           uc(0,j) = c1*utmp(-2,j) + c2*utmp(-1,j) + c3*utmp(0,j) 
! 3-pt extrapolation --------------------------------------------------
           uc(1,j) = ( t14*(utmp( 0,j)+utmp(1,j))    &
                     + t12*(utmp(-1,j)+utmp(2,j))    &
                     + t15*(utmp(-2,j)+utmp(3,j)) )*rsin_u(1,j)
           uc(2,j) = c1*utmp(3,j) + c2*utmp(2,j) + c3*utmp(1,j)
           ut(0,j) = (uc(0,j) - v(0,j)*cosa_u(0,j))*rsin_u(0,j)
           ut(1,j) =  uc(1,j) * rsin_u(1,j)
           ut(2,j) = (uc(2,j) - v(2,j)*cosa_u(2,j))*rsin_u(2,j)
        enddo
     endif

     if( (ie+1)==npx ) then
        do j=js-1,je+1
           uc(npx-1,j) = c1*utmp(npx-3,j)+c2*utmp(npx-2,j)+c3*utmp(npx-1,j) 
! 3-pt extrapolation --------------------------------------------------------
           uc(npx,j) = (t14*(utmp(npx-1,j)+utmp(npx,j))+      &
                        t12*(utmp(npx-2,j)+utmp(npx+1,j))     &
                      + t15*(utmp(npx-3,j)+utmp(npx+2,j)))*rsin_u(npx,j)
           uc(npx+1,j) = c3*utmp(npx,j) + c2*utmp(npx+1,j) + c1*utmp(npx+2,j) 
           ut(npx-1,j) = (uc(npx-1,j)-v(npx-1,j)*cosa_u(npx-1,j))*rsin_u(npx-1,j)
           ut(npx,  j) =  uc(npx,j) * rsin_u(npx,j)
           ut(npx+1,j) = (uc(npx+1,j)-v(npx+1,j)*cosa_u(npx+1,j))*rsin_u(npx+1,j)
        enddo
     endif

     endif

!------
! Ydir:
!------
     if( sw_corner ) then
         do j=-2,0
            vtmp(0,j) = -utmp(1-j,0)
         enddo
     endif
     if( nw_corner ) then
         do j=0,2
            vtmp(0,npy+j) = utmp(j+1,npy)
         enddo
     endif
     if( se_corner ) then
         do j=-2,0
            vtmp(npx,j) = utmp(ie+j,0)
         enddo
     endif
     if( ne_corner ) then
         do j=0,2
            vtmp(npx,npy+j) = -utmp(ie-j,npy)
         enddo
     endif

     if (grid_type < 3) then

     do j=js-1,je+2
      if ( j==1 ) then
        do i=is-1,ie+1
! 3-pt extrapolation -----------------------------------------
           vc(i,1) = (t14*(vtmp(i, 0)+vtmp(i,1))    &
                    + t12*(vtmp(i,-1)+vtmp(i,2))    &
                    + t15*(vtmp(i,-2)+vtmp(i,3)))*rsin_v(i,1)
           vt(i,1) = vc(i,1) * rsin_v(i,1)
        enddo
      elseif ( j==0 .or. j==(npy-1) ) then
        do i=is-1,ie+1
           vc(i,j) = c1*vtmp(i,j-2) + c2*vtmp(i,j-1) + c3*vtmp(i,j)
           vt(i,j) = (vc(i,j) - u(i,j)*cosa_v(i,j))*rsin_v(i,j)
        enddo
      elseif ( j==2 .or. j==(npy+1) ) then
        do i=is-1,ie+1
           vc(i,j) = c1*vtmp(i,j+1) + c2*vtmp(i,j) + c3*vtmp(i,j-1)
           vt(i,j) = (vc(i,j) - u(i,j)*cosa_v(i,j))*rsin_v(i,j)
        enddo
      elseif ( j==npy ) then
        do i=is-1,ie+1
! 3-pt extrapolation --------------------------------------------------------
           vc(i,npy) = (t14*(vtmp(i,npy-1)+vtmp(i,npy))    &
                      + t12*(vtmp(i,npy-2)+vtmp(i,npy+1))  &
                      + t15*(vtmp(i,npy-3)+vtmp(i,npy+2)))*rsin_v(i,npy)
           vt(i,npy) = vc(i,npy) * rsin_v(i,npy)
        enddo
      else
! 4th order interpolation for interior points:
        do i=is-1,ie+1
           vc(i,j) = a2*(vtmp(i,j-2)+vtmp(i,j+1))+a1*(vtmp(i,j-1)+vtmp(i,j))
           vt(i,j) = (vc(i,j) - u(i,j)*cosa_v(i,j))*rsin_v(i,j)
        enddo
      endif
     enddo

    else
! 4th order interpolation:
       do j=js-1,je+2
          do i=is-1,ie+1
!            vc(i,j) = a2*(vtmp(i,j-2)+vtmp(i,j+1))+a1*(vtmp(i,j-1)+vtmp(i,j))
             vc(i,j) = b2*(vtmp(i,j-2)+vtmp(i,j+1))+b1*(vtmp(i,j-1)+vtmp(i,j))
             vt(i,j) = vc(i,j)
          enddo
       enddo
    endif

 end subroutine d2a2c_vect
 

 subroutine d2a2c_vect_v2( u, v, ua, va, uc, vc, ut, vt )
  real, intent(in)::  u(isd:ied,jsd:jed+1)
  real, intent(in)::  v(isd:ied+1,jsd:jed)
  real, intent(out), dimension(isd:ied+1,jsd:jed  ):: uc
  real, intent(out), dimension(isd:ied  ,jsd:jed+1):: vc
  real, intent(out), dimension(isd:ied  ,jsd:jed  ):: ua, va, ut, vt
! Local 
    real, dimension(is-2:ie+2,js-2:je+2):: wk
    real :: utmp, vtmp
    integer i, j

! needs only ut[is-1:ie+2,js-1:je+1], vt[is-1:ie+1,js-1:je+2]

     do j=js-2,je+2
        do i=is-2,ie+3
           uc(i,j) = v(i,j)*dy(i,j)
        enddo
     enddo
     do j=js-2,je+3
        do i=is-2,ie+2
           vc(i,j) = u(i,j)*dx(i,j)
        enddo
     enddo

! D --> A
! Co-variant to Co-variant "vorticity-conserving" interpolation
     do j=js-2,je+2
        do i=is-2,ie+2
           utmp = 0.5*(vc(i,j) + vc(i,j+1)) * rdxa(i,j)
           vtmp = 0.5*(uc(i,j) + uc(i+1,j)) * rdya(i,j)
           ua(i,j) = (utmp-vtmp*cosa_s(i,j))*rsin2(i,j)
           va(i,j) = (vtmp-utmp*cosa_s(i,j))*rsin2(i,j)
        enddo
     enddo

! Xdir:
     if( sw_corner ) then
         ua(-1,0) = -va(0,2)
         ua( 0,0) = -va(0,1) 
     endif
     if( se_corner ) then
         ua(npx,  0) = va(npx,1)
         ua(npx+1,0) = va(npx,2) 
     endif
     if( ne_corner ) then
         ua(npx,  npy) = -va(npx,npy-1)
         ua(npx+1,npy) = -va(npx,npy-2) 
     endif
     if( nw_corner ) then
         ua(-1,npy) = va(0,npy-2)
         ua( 0,npy) = va(0,npy-1) 
     endif

! A -> C
!--------------------------------------------
! Divergence conserving interp to cell walls
!--------------------------------------------
     do j=js-1,je+1
        do i=is-2,ie+2
           wk(i,j) = ua(i,j)*dya(i,j)*sina_s(i,j)
        enddo
     enddo
     do j=js-1,je+1
        do i=is-1,ie+2
           ut(i,j) = 0.5*(wk(i-1,j)+wk(i,j)) / (dy(i,j)*sina_u(i,j))
           uc(i,j) = ut(i,j) + 0.5*(va(i-1,j)*cosa_s(i-1,j)+va(i,j)*cosa_s(i,j))
        enddo
     enddo

     if (grid_type < 3) then
     if ( is==1 ) then
        i=1
        do j=js-1,je+1
!          ut(i,j) = 0.75*(ua(i-1,j)+ua(i,j))-0.25*(ua(i-2,j)+ua(i+1,j))
           ut(i,j) = 0.25*(-ua(-1,j) + 3.*(ua(0,j)+ua(1,j)) - ua(2,j))
           uc(i,j) = ut(i,j)*sina_u(i,j)
        enddo
     endif

     if ( (ie+1)==npx ) then
        i=npx
        do j=js-1,je+1
!          ut(i,j) = 0.75*(ua(i-1,j)+ua(i,j))-0.25*(ua(i-2,j)+ua(i+1,j))
           ut(i,j) = 0.25*(-ua(i-2,j) + 3.*(ua(i-1,j)+ua(i,j)) - ua(i+1,j))
           uc(i,j) = ut(i,j)*sina_u(i,j)
        enddo
     endif
     endif

! Ydir:
     if( sw_corner ) then
         va(0,-1) = -ua(2,0)
         va(0, 0) = -ua(1,0)
     endif
     if( se_corner ) then
         va(npx, 0) = ua(npx-1,0)
         va(npx,-1) = ua(npx-2,0)
     endif
     if( ne_corner ) then
         va(npx,npy  ) = -ua(npx-1,npy)
         va(npx,npy+1) = -ua(npx-2,npy)
     endif
     if( nw_corner ) then
         va(0,npy)   = ua(1,npy)
         va(0,npy+1) = ua(2,npy)
     endif

     do j=js-2,je+2
        do i=is-1,ie+1
           wk(i,j) = va(i,j)*dxa(i,j)*sina_s(i,j)
        enddo
     enddo

     if (grid_type < 3) then
     do j=js-1,je+2
        if ( j==1 .or. j==npy ) then
          do i=is-1,ie+1
             vt(i,j) = 0.25*(-va(i,j-2) + 3.*(va(i,j-1)+va(i,j)) - va(i,j+1))
             vc(i,j) = vt(i,j)*sina_v(i,j)
          enddo
        else
          do i=is-1,ie+1
             vt(i,j) = 0.5*(wk(i,j-1)+wk(i,j)) / (dx(i,j)*sina_v(i,j))
             vc(i,j) = vt(i,j) + 0.5*(ua(i,j-1)*cosa_s(i,j-1)+ua(i,j)*cosa_s(i,j))
          enddo
        endif
     enddo
     else
        do j=js-1,je+2
           do i=is-1,ie+1
              vt(i,j) = 0.5*(wk(i,j-1)+wk(i,j)) / (dx(i,j)*sina_v(i,j))
              vc(i,j) = vt(i,j) + 0.5*(ua(i,j-1)*cosa_s(i,j-1)+ua(i,j)*cosa_s(i,j))
           enddo
        enddo
     endif

 end subroutine d2a2c_vect_v2

 
      
 subroutine d2a2c_vect_v1( u,v, ua,va, uc,vc, ut,vt )
  real, intent(in) ::  u(isd:ied,jsd:jed+1)
  real, intent(in) ::  v(isd:ied+1,jsd:jed)
  real, intent(out), dimension(isd:ied+1,jsd:jed  ):: uc
  real, intent(out), dimension(isd:ied  ,jsd:jed+1):: vc
  real, intent(out), dimension(isd:ied  ,jsd:jed  ):: ua, va, ut, vt
! Local 
  real, dimension(isd:ied,jsd:jed):: v1, v2, v3
  real, dimension(isd:ied,jsd:jed):: utmp, vtmp
    real vw1, vw2, vw3
    real vs1, vs2, vs3
    real up, vp
    integer i, j

#if !defined(TAF_DOES_NOT_LIKE)
  real, parameter:: r3 =   1./3.
  real, parameter:: t11=27./28., t12=-13./28., t13=3./7., t14=6./7., t15=3./28.
  real, parameter:: s11=11./14., s13=-13./14., s14=4./7., s15=3./14.
! PPM volume mean form:
  real, parameter:: b1 =  7./12.     ! 0.58333333
  real, parameter:: b2 = -1./12.
! 4-pt Lagrange interpolation
  real, parameter:: a1 =  0.5625
  real, parameter:: a2 = -0.0625
!----------------------------------------------
! 3-pt off-center intp formular:
  real, parameter:: c1 = -0.125
  real, parameter:: c2 =  0.75
  real, parameter:: c3 =  0.375
!----------------------------------------------
#endif

! Needs ut[is-1:ie+2,js-1:je+1], vt[is-1:ie+1,js-1:je+2]

     do j=jsd,jed
        do i=isd,ied+1
           uc(i,j) = v(i,j)*dy(i,j)
        enddo
     enddo
     do j=jsd,jed+1
        do i=isd,ied
           vc(i,j) = u(i,j)*dx(i,j)
        enddo
     enddo

! D --> A
     do j=jsd,jed
        do i=isd,ied
           up = 0.5*(vc(i,j) + vc(i,j+1)) * rdxa(i,j)
           vp = 0.5*(uc(i,j) + uc(i+1,j)) * rdya(i,j)
           ua(i,j) = (up-vp*cosa_s(i,j)) * rsin2(i,j)
           va(i,j) = (vp-up*cosa_s(i,j)) * rsin2(i,j)
           v1(i,j) = ua(i,j)*ec1(1,i,j) + va(i,j)*ec2(1,i,j)
           v2(i,j) = ua(i,j)*ec1(2,i,j) + va(i,j)*ec2(2,i,j)
           v3(i,j) = ua(i,j)*ec1(3,i,j) + va(i,j)*ec2(3,i,j)
        enddo
     enddo

! A -> C (across face averaging taking place here):
! Xdir
     call fill3_4corners(v1, v2, v3, 1)
!    call copy_corners(v1, npx, npy, 1)
!    call copy_corners(v2, npx, npy, 1)
!    call copy_corners(v3, npx, npy, 1)

! 4th order interpolation:
     do j=js-1,je+1
        do i=max(3,is-1),min(npx-2,ie+2)
           vw1 = a2*(v1(i-2,j)+v1(i+1,j)) + a1*(v1(i-1,j)+v1(i,j))
           vw2 = a2*(v2(i-2,j)+v2(i+1,j)) + a1*(v2(i-1,j)+v2(i,j))
           vw3 = a2*(v3(i-2,j)+v3(i+1,j)) + a1*(v3(i-1,j)+v3(i,j))
           uc(i,j) = vw1*ew(1,i,j,1) + vw2*ew(2,i,j,1) + vw3*ew(3,i,j,1)
           ut(i,j) = (uc(i,j)-v(i,j)*cosa_u(i,j)) * rsin_u(i,j)
        enddo
     enddo

! Fix the edge:
     if ( is==1 ) then
        do j=js-1,je+1
        i=0
           vw1 = c1*v1(-2,j) + c2*v1(-1,j) + c3*v1(0,j) 
           vw2 = c1*v2(-2,j) + c2*v2(-1,j) + c3*v2(0,j) 
           vw3 = c1*v3(-2,j) + c2*v3(-1,j) + c3*v3(0,j) 
           uc(i,j) = vw1*ew(1,i,j,1) + vw2*ew(2,i,j,1) + vw3*ew(3,i,j,1)
           ut(i,j) = (uc(i,j)-v(i,j)*cosa_u(i,j)) * rsin_u(i,j)
        i=1
           vw1 = 3.*(v1(0,j)+v1(1,j)) - (v1(-1,j)+v1(2,j))
           vw2 = 3.*(v2(0,j)+v2(1,j)) - (v2(-1,j)+v2(2,j))
           vw3 = 3.*(v3(0,j)+v3(1,j)) - (v3(-1,j)+v3(2,j))
           uc(i,j) = 0.25*(vw1*ew(1,i,j,1) + vw2*ew(2,i,j,1) + vw3*ew(3,i,j,1))
           ut(i,j) = uc(i,j)*rsin_u(i,j)
        i=2
           vw1 = c3*v1(1,j) + c2*v1(2,j) + c1*v1(3,j)
           vw2 = c3*v2(1,j) + c2*v2(2,j) + c1*v2(3,j)
           vw3 = c3*v3(1,j) + c2*v3(2,j) + c1*v3(3,j)
           uc(i,j) = vw1*ew(1,i,j,1) + vw2*ew(2,i,j,1) + vw3*ew(3,i,j,1)
           ut(i,j) = (uc(i,j)-v(i,j)*cosa_u(i,j)) * rsin_u(i,j)
        enddo
     endif

     if ( (ie+1)==npx ) then
        do j=js-1,je+1
        i=npx-1
           vw1 = c1*v1(npx-3,j) + c2*v1(npx-2,j) + c3*v1(npx-1,j) 
           vw2 = c1*v2(npx-3,j) + c2*v2(npx-2,j) + c3*v2(npx-1,j) 
           vw3 = c1*v3(npx-3,j) + c2*v3(npx-2,j) + c3*v3(npx-1,j) 
           uc(i,j) = vw1*ew(1,i,j,1) + vw2*ew(2,i,j,1) + vw3*ew(3,i,j,1)
           ut(i,j) = (uc(i,j)-v(i,j)*cosa_u(i,j)) * rsin_u(i,j)
        i=npx
           vw1 = 3.*(v1(i-1,j)+v1(i,j)) - (v1(i-2,j)+v1(i+1,j))
           vw2 = 3.*(v2(i-1,j)+v2(i,j)) - (v2(i-2,j)+v2(i+1,j))
           vw3 = 3.*(v3(i-1,j)+v3(i,j)) - (v3(i-2,j)+v3(i+1,j))
           uc(i,j) = 0.25*(vw1*ew(1,i,j,1) + vw2*ew(2,i,j,1) + vw3*ew(3,i,j,1))
           ut(i,j) = uc(i,j)*rsin_u(i,j)
        i=npx+1
           vw1 = c3*v1(npx,j) + c2*v1(npx+1,j) + c1*v1(npx+2,j) 
           vw2 = c3*v2(npx,j) + c2*v2(npx+1,j) + c1*v2(npx+2,j) 
           vw3 = c3*v3(npx,j) + c2*v3(npx+1,j) + c1*v3(npx+2,j) 
           uc(i,j) = vw1*ew(1,i,j,1) + vw2*ew(2,i,j,1) + vw3*ew(3,i,j,1)
           ut(i,j) = (uc(i,j)-v(i,j)*cosa_u(i,j)) * rsin_u(i,j)
        enddo
     endif

! Ydir:
     call fill3_4corners(v1, v2, v3, 2)
!    call copy_corners(v1, npx, npy, 2)
!    call copy_corners(v2, npx, npy, 2)
!    call copy_corners(v3, npx, npy, 2)

     do j=js-1,je+2
        if( j==0 .or. j==(npy-1) ) then
          do i=is-1,ie+1
             vs1 = c1*v1(i,j-2) + c2*v1(i,j-1) + c3*v1(i,j)
             vs2 = c1*v2(i,j-2) + c2*v2(i,j-1) + c3*v2(i,j)
             vs3 = c1*v3(i,j-2) + c2*v3(i,j-1) + c3*v3(i,j)
             vc(i,j) = vs1*es(1,i,j,2) + vs2*es(2,i,j,2) + vs3*es(3,i,j,2)
             vt(i,j) = (vc(i,j)-u(i,j)*cosa_v(i,j)) * rsin_v(i,j)
          enddo
        elseif ( j==2 .or. j==(npy+1) ) then
          do i=is-1,ie+1
             vs1 = c3*v1(i,j-1) + c2*v1(i,j) + c1*v1(i,j+1)
             vs2 = c3*v2(i,j-1) + c2*v2(i,j) + c1*v2(i,j+1)
             vs3 = c3*v3(i,j-1) + c2*v3(i,j) + c1*v3(i,j+1)
             vc(i,j) = vs1*es(1,i,j,2) + vs2*es(2,i,j,2) + vs3*es(3,i,j,2)
             vt(i,j) = (vc(i,j)-u(i,j)*cosa_v(i,j)) * rsin_v(i,j)
          enddo
        elseif ( j==1 .or. j==npy ) then
          do i=is-1,ie+1
              vs1 = 3.*(v1(i,j-1)+v1(i,j)) - (v1(i,j-2)+v1(i,j+1))
              vs2 = 3.*(v2(i,j-1)+v2(i,j)) - (v2(i,j-2)+v2(i,j+1))
              vs3 = 3.*(v3(i,j-1)+v3(i,j)) - (v3(i,j-2)+v3(i,j+1))
              vc(i,j) = 0.25*(vs1*es(1,i,j,2) + vs2*es(2,i,j,2) + vs3*es(3,i,j,2))
              vt(i,j) = vc(i,j)*rsin_v(i,j)
          enddo
        else
! Interior: 4th order
          do i=is-1,ie+1
             vs1 = a2*(v1(i,j-2)+v1(i,j+1)) + a1*(v1(i,j-1)+v1(i,j))
             vs2 = a2*(v2(i,j-2)+v2(i,j+1)) + a1*(v2(i,j-1)+v2(i,j))
             vs3 = a2*(v3(i,j-2)+v3(i,j+1)) + a1*(v3(i,j-1)+v3(i,j))
             vc(i,j) = vs1*es(1,i,j,2) + vs2*es(2,i,j,2) + vs3*es(3,i,j,2)
             vt(i,j) = (vc(i,j)-u(i,j)*cosa_v(i,j)) * rsin_v(i,j)
          enddo
        endif
     enddo

 end subroutine d2a2c_vect_v1
      

 subroutine fill3_4corners(q1, q2, q3, dir)
! This routine fill the 4 corners of the scalar fileds only as needed by c_core
  integer, intent(in):: dir                ! 1: x-dir; 2: y-dir
  real, intent(inout):: q1(isd:ied,jsd:jed)
  real, intent(inout):: q2(isd:ied,jsd:jed)
  real, intent(inout):: q3(isd:ied,jsd:jed)
  integer i,j

  select case(dir)
  case(1)
      if ( sw_corner ) then
          q1(-1,0) = q1(0,2); q1(0,0) = q1(0,1); q1(0,-1) = q1(-1,1)
          q2(-1,0) = q2(0,2); q2(0,0) = q2(0,1); q2(0,-1) = q2(-1,1)
          q3(-1,0) = q3(0,2); q3(0,0) = q3(0,1); q3(0,-1) = q3(-1,1)
      endif
      if ( se_corner ) then
          q1(npx+1,0) = q1(npx,2); q1(npx,0) = q1(npx,1); q1(npx,-1) = q1(npx+1,1)
          q2(npx+1,0) = q2(npx,2); q2(npx,0) = q2(npx,1); q2(npx,-1) = q2(npx+1,1)
          q3(npx+1,0) = q3(npx,2); q3(npx,0) = q3(npx,1); q3(npx,-1) = q3(npx+1,1)
      endif
      if ( ne_corner ) then
          q1(npx,npy) = q1(npx,npy-1); q1(npx+1,npy) = q1(npx,npy-2); q1(npx,npy+1) = q1(npx+1,npy-1)
          q2(npx,npy) = q2(npx,npy-1); q2(npx+1,npy) = q2(npx,npy-2); q2(npx,npy+1) = q2(npx+1,npy-1)
          q3(npx,npy) = q3(npx,npy-1); q3(npx+1,npy) = q3(npx,npy-2); q3(npx,npy+1) = q3(npx+1,npy-1)
      endif
      if ( nw_corner ) then
          q1(0,npy) = q1(0,npy-1); q1(-1,npy) = q1(0,npy-2); q1(0,npy+1) = q1(-1,npy-1)
          q2(0,npy) = q2(0,npy-1); q2(-1,npy) = q2(0,npy-2); q2(0,npy+1) = q2(-1,npy-1)
          q3(0,npy) = q3(0,npy-1); q3(-1,npy) = q3(0,npy-2); q3(0,npy+1) = q3(-1,npy-1)
      endif

  case(2)
      if ( sw_corner ) then
          q1(0,0) = q1(1,0); q1(0,-1) = q1(2,0); q1(-1,0) = q1(1,-1)
          q2(0,0) = q2(1,0); q2(0,-1) = q2(2,0); q2(-1,0) = q2(1,-1)
          q3(0,0) = q3(1,0); q3(0,-1) = q3(2,0); q3(-1,0) = q3(1,-1)
      endif
      if ( se_corner ) then
          q1(npx,0) = q1(npx-1,0); q1(npx,-1) = q1(npx-2,0); q1(npx+1,0) = q1(npx-1,-1)
          q2(npx,0) = q2(npx-1,0); q2(npx,-1) = q2(npx-2,0); q2(npx+1,0) = q2(npx-1,-1)
          q3(npx,0) = q3(npx-1,0); q3(npx,-1) = q3(npx-2,0); q3(npx+1,0) = q3(npx-1,-1)
      endif
      if ( ne_corner ) then
          q1(npx,npy) = q1(npx-1,npy); q1(npx,npy+1) = q1(npx-2,npy); q1(npx+1,npy) = q1(npx-1,npy+1)
          q2(npx,npy) = q2(npx-1,npy); q2(npx,npy+1) = q2(npx-2,npy); q2(npx+1,npy) = q2(npx-1,npy+1)
          q3(npx,npy) = q3(npx-1,npy); q3(npx,npy+1) = q3(npx-2,npy); q3(npx+1,npy) = q3(npx-1,npy+1)
      endif
      if ( nw_corner ) then
          q1(0,npy) = q1(1,npy); q1(0,npy+1) = q1(2,npy); q1(-1,npy) = q1(1,npy+1)
          q2(0,npy) = q2(1,npy); q2(0,npy+1) = q2(2,npy); q2(-1,npy) = q2(1,npy+1)
          q3(0,npy) = q3(1,npy); q3(0,npy+1) = q3(2,npy); q3(-1,npy) = q3(1,npy+1)
      endif

  end select
 end subroutine fill3_4corners


 subroutine fill2_4corners(q1, q2, dir)
! This routine fill the 4 corners of the scalar fileds only as needed by c_core
  integer, intent(in):: dir                ! 1: x-dir; 2: y-dir
  real, intent(inout):: q1(isd:ied,jsd:jed)
  real, intent(inout):: q2(isd:ied,jsd:jed)

  select case(dir)
  case(1)
      if ( sw_corner ) then
          q1(-1,0) = q1(0,2);    q1(0,0) = q1(0,1)
          q2(-1,0) = q2(0,2);    q2(0,0) = q2(0,1)
      endif
      if ( se_corner ) then
          q1(npx+1,0) = q1(npx,2); q1(npx,0) = q1(npx,1)
          q2(npx+1,0) = q2(npx,2); q2(npx,0) = q2(npx,1)
      endif
      if ( nw_corner ) then
          q1(0,npy) = q1(0,npy-1); q1(-1,npy) = q1(0,npy-2)
          q2(0,npy) = q2(0,npy-1); q2(-1,npy) = q2(0,npy-2)
      endif
      if ( ne_corner ) then
          q1(npx,npy) = q1(npx,npy-1); q1(npx+1,npy) = q1(npx,npy-2)
          q2(npx,npy) = q2(npx,npy-1); q2(npx+1,npy) = q2(npx,npy-2)
      endif

  case(2)
      if ( sw_corner ) then
          q1(0,0) = q1(1,0); q1(0,-1) = q1(2,0)
          q2(0,0) = q2(1,0); q2(0,-1) = q2(2,0)
      endif
      if ( se_corner ) then
          q1(npx,0) = q1(npx-1,0); q1(npx,-1) = q1(npx-2,0)
          q2(npx,0) = q2(npx-1,0); q2(npx,-1) = q2(npx-2,0)
      endif
      if ( nw_corner ) then
          q1(0,npy) = q1(1,npy); q1(0,npy+1) = q1(2,npy)
          q2(0,npy) = q2(1,npy); q2(0,npy+1) = q2(2,npy)
      endif
      if ( ne_corner ) then
          q1(npx,npy) = q1(npx-1,npy); q1(npx,npy+1) = q1(npx-2,npy)
          q2(npx,npy) = q2(npx-1,npy); q2(npx,npy+1) = q2(npx-2,npy)
      endif

  end select

 end subroutine fill2_4corners

 subroutine fill_4corners(q, dir)
! This routine fill the 4 corners of the scalar fileds only as needed by c_core
  integer, intent(in):: dir                ! 1: x-dir; 2: y-dir
  real, intent(inout):: q(isd:ied,jsd:jed)

  select case(dir)
  case(1)
      if ( sw_corner ) then
          q(-1,0) = q(0,2)
          q( 0,0) = q(0,1)
      endif
      if ( se_corner ) then
          q(npx+1,0) = q(npx,2)
          q(npx,  0) = q(npx,1)
      endif
      if ( nw_corner ) then
          q( 0,npy) = q(0,npy-1)
          q(-1,npy) = q(0,npy-2)
      endif
      if ( ne_corner ) then
          q(npx,  npy) = q(npx,npy-1)
          q(npx+1,npy) = q(npx,npy-2)
      endif

  case(2)
      if ( sw_corner ) then
          q(0, 0) = q(1,0)
          q(0,-1) = q(2,0)
      endif
      if ( se_corner ) then
          q(npx, 0) = q(npx-1,0)
          q(npx,-1) = q(npx-2,0)
      endif
      if ( nw_corner ) then
          q(0,npy  ) = q(1,npy)
          q(0,npy+1) = q(2,npy)
      endif
      if ( ne_corner ) then
          q(npx,npy  ) = q(npx-1,npy)
          q(npx,npy+1) = q(npx-2,npy)
      endif

  end select

 end subroutine fill_4corners

 end module sw_core_mod

