; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX90A,GFX90A_ITERATIVE %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX90A,GFX90A_DPP %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX940,GFX940_ITERATIVE %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX940,GFX940_DPP %s

define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   S_ENDPGM 0
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   S_ENDPGM 0
  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret void
}

define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
  ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret double %ret
}

define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   S_ENDPGM 0
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   S_ENDPGM 0
  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret void
}

define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
  ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret double %ret
}

define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   S_ENDPGM 0
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   S_ENDPGM 0
  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret void
}

define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
  ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret double %ret
}

define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   S_ENDPGM 0
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   S_ENDPGM 0
  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret void
}

define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
  ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
  ret double %ret
}

define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   S_ENDPGM 0
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   S_ENDPGM 0
  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
  ret void
}

define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
  ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
  ; GFX90A: bb.1 (%ir-block.0):
  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX90A-NEXT: {{  $}}
  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
  ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ;
  ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
  ; GFX940: bb.1 (%ir-block.0):
  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
  ; GFX940-NEXT: {{  $}}
  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
  ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
  ret double %ret
}

define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
  ; GFX90A_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
  ; GFX90A_ITERATIVE: bb.1 (%ir-block.0):
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
  ; GFX90A_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX90A_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.2
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.2 (%ir-block.5):
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.7
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.3 (%ir-block.7):
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], %25, [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.4.Flow:
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   SI_END_CF %35, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.6
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.5 (%ir-block.9):
  ; GFX90A_ITERATIVE-NEXT:   S_ENDPGM 0
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.6.Flow1:
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.5
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.7.ComputeLoop:
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %17, %bb.7, [[S_MOV_B]], %bb.2
  ; GFX90A_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI %22, %bb.7, [[COPY4]], %bb.2
  ; GFX90A_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY5]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY6]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY9]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY10]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
  ; GFX90A_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.8
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.8.ComputeEnd:
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_]], %bb.7
  ; GFX90A_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY16]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE3]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
  ; GFX90A_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY18]], [[COPY19]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY20]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY21]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.3
  ;
  ; GFX90A_DPP-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
  ; GFX90A_DPP: bb.1 (%ir-block.0):
  ; GFX90A_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
  ; GFX90A_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX90A_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   S_BRANCH %bb.2
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.2 (%ir-block.5):
  ; GFX90A_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX90A_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX90A_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
  ; GFX90A_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
  ; GFX90A_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
  ; GFX90A_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX90A_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
  ; GFX90A_DPP-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
  ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
  ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
  ; GFX90A_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY22]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   S_BRANCH %bb.3
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.3 (%ir-block.31):
  ; GFX90A_DPP-NEXT:   successors: %bb.4(0x80000000)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A_DPP-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.4.Flow:
  ; GFX90A_DPP-NEXT:   successors: %bb.5(0x80000000)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.5 (%ir-block.33):
  ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   S_ENDPGM 0
  ;
  ; GFX940_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
  ; GFX940_ITERATIVE: bb.1 (%ir-block.0):
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
  ; GFX940_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX940_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.2
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.2 (%ir-block.5):
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.7
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.3 (%ir-block.7):
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], %24, [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.4.Flow:
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   SI_END_CF %34, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.6
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.5 (%ir-block.9):
  ; GFX940_ITERATIVE-NEXT:   S_ENDPGM 0
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.6.Flow1:
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.5
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.7.ComputeLoop:
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %16, %bb.7, [[S_MOV_B]], %bb.2
  ; GFX940_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI %21, %bb.7, [[COPY4]], %bb.2
  ; GFX940_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY5]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY6]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
  ; GFX940_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY9]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY10]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
  ; GFX940_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
  ; GFX940_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
  ; GFX940_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.8
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.8.ComputeEnd:
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_]], %bb.7
  ; GFX940_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY16]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE3]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
  ; GFX940_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY18]], [[COPY19]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
  ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY20]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY21]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.3
  ;
  ; GFX940_DPP-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
  ; GFX940_DPP: bb.1 (%ir-block.0):
  ; GFX940_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
  ; GFX940_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX940_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT:   S_BRANCH %bb.2
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.2 (%ir-block.5):
  ; GFX940_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX940_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX940_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
  ; GFX940_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
  ; GFX940_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
  ; GFX940_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX940_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX940_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
  ; GFX940_DPP-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
  ; GFX940_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
  ; GFX940_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
  ; GFX940_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
  ; GFX940_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY22]], implicit $exec
  ; GFX940_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT:   S_BRANCH %bb.3
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.3 (%ir-block.31):
  ; GFX940_DPP-NEXT:   successors: %bb.4(0x80000000)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940_DPP-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.4.Flow:
  ; GFX940_DPP-NEXT:   successors: %bb.5(0x80000000)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.5 (%ir-block.33):
  ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT:   S_ENDPGM 0
  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
  ret void
}

define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
  ; GFX90A_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
  ; GFX90A_ITERATIVE: bb.1 (%ir-block.0):
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
  ; GFX90A_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A_ITERATIVE-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
  ; GFX90A_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX90A_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.2
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.2 (%ir-block.5):
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.7
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.3 (%ir-block.7):
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], %28, [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.4 (%ir-block.9):
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.8
  ; GFX90A_ITERATIVE-NEXT:   SI_END_CF %38, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY7]], 0, %27, 0, 0, implicit $mode, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX90A_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX90A_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY10]], 0, [[COPY8]], %36, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY11]], 0, [[COPY9]], %36, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.6
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.5 (%ir-block.14):
  ; GFX90A_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY %44.sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY %44.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
  ; GFX90A_ITERATIVE-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.6.Flow:
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE2]], %bb.4, [[DEF]], %bb.1
  ; GFX90A_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.5
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.7.ComputeLoop:
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI %19, %bb.7, [[S_MOV_B]], %bb.2
  ; GFX90A_ITERATIVE-NEXT:   [[PHI3:%[0-9]+]]:vreg_64_align2 = PHI %18, %bb.7, [[DEF]], %bb.2
  ; GFX90A_ITERATIVE-NEXT:   [[PHI4:%[0-9]+]]:vreg_64_align2 = PHI %24, %bb.7, [[COPY4]], %bb.2
  ; GFX90A_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY14]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY15]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_7]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_6]], $m0, [[COPY18]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_9]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_8]], $m0, [[COPY19]]
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY22]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY23]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
  ; GFX90A_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.8
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT: bb.8.ComputeEnd:
  ; GFX90A_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
  ; GFX90A_ITERATIVE-NEXT: {{  $}}
  ; GFX90A_ITERATIVE-NEXT:   [[PHI5:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE4]], %bb.7
  ; GFX90A_ITERATIVE-NEXT:   [[PHI6:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_1]], %bb.7
  ; GFX90A_ITERATIVE-NEXT:   [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY28:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY29:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY29]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX90A_ITERATIVE-NEXT:   [[COPY30:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0
  ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_ITERATIVE-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY27]]
  ; GFX90A_ITERATIVE-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY31]], [[COPY32]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY30]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY33]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY34]], implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.3
  ;
  ; GFX90A_DPP-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
  ; GFX90A_DPP: bb.1 (%ir-block.0):
  ; GFX90A_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
  ; GFX90A_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX90A_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX90A_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
  ; GFX90A_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX90A_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   S_BRANCH %bb.2
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.2 (%ir-block.5):
  ; GFX90A_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX90A_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX90A_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
  ; GFX90A_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX90A_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
  ; GFX90A_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
  ; GFX90A_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX90A_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY19:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX90A_DPP-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY19]], [[V_ADD_F64_e64_5]], 312, 15, 15, 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
  ; GFX90A_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
  ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
  ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
  ; GFX90A_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX90A_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY23]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   S_BRANCH %bb.3
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.3 (%ir-block.32):
  ; GFX90A_DPP-NEXT:   successors: %bb.5(0x80000000)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX90A_DPP-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX90A_DPP-NEXT:   S_BRANCH %bb.5
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.4.Flow:
  ; GFX90A_DPP-NEXT:   successors: %bb.6(0x80000000)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %45, %bb.5, [[DEF]], %bb.1
  ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   S_BRANCH %bb.6
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.5 (%ir-block.35):
  ; GFX90A_DPP-NEXT:   successors: %bb.4(0x80000000)
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
  ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
  ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   [[STRICT_WWM1:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[V_MOV_B6]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE4]]
  ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_6:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY26]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
  ; GFX90A_DPP-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX90A_DPP-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX90A_DPP-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub1
  ; GFX90A_DPP-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY29]], 0, [[COPY27]], [[V_CMP_EQ_U32_e64_]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY30]], 0, [[COPY28]], [[V_CMP_EQ_U32_e64_]], implicit $exec
  ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
  ; GFX90A_DPP-NEXT:   S_BRANCH %bb.4
  ; GFX90A_DPP-NEXT: {{  $}}
  ; GFX90A_DPP-NEXT: bb.6 (%ir-block.41):
  ; GFX90A_DPP-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
  ; GFX90A_DPP-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
  ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
  ; GFX90A_DPP-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
  ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
  ; GFX90A_DPP-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
  ; GFX90A_DPP-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ;
  ; GFX940_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
  ; GFX940_ITERATIVE: bb.1 (%ir-block.0):
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
  ; GFX940_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940_ITERATIVE-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
  ; GFX940_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX940_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.2
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.2 (%ir-block.5):
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.7
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.3 (%ir-block.7):
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], %27, [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.4 (%ir-block.9):
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.8
  ; GFX940_ITERATIVE-NEXT:   SI_END_CF %37, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY7]], 0, %26, 0, 0, implicit $mode, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX940_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX940_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY10]], 0, [[COPY8]], %35, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY11]], 0, [[COPY9]], %35, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.6
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.5 (%ir-block.14):
  ; GFX940_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY %43.sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY %43.sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
  ; GFX940_ITERATIVE-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.6.Flow:
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE2]], %bb.4, [[DEF]], %bb.1
  ; GFX940_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.5
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.7.ComputeLoop:
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI %18, %bb.7, [[S_MOV_B]], %bb.2
  ; GFX940_ITERATIVE-NEXT:   [[PHI3:%[0-9]+]]:vreg_64_align2 = PHI %17, %bb.7, [[DEF]], %bb.2
  ; GFX940_ITERATIVE-NEXT:   [[PHI4:%[0-9]+]]:vreg_64_align2 = PHI %23, %bb.7, [[COPY4]], %bb.2
  ; GFX940_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY14]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY15]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_7]]
  ; GFX940_ITERATIVE-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_6]], $m0, [[COPY18]]
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_9]]
  ; GFX940_ITERATIVE-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_8]], $m0, [[COPY19]]
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
  ; GFX940_ITERATIVE-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
  ; GFX940_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY22]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY23]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
  ; GFX940_ITERATIVE-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
  ; GFX940_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
  ; GFX940_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.8
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT: bb.8.ComputeEnd:
  ; GFX940_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
  ; GFX940_ITERATIVE-NEXT: {{  $}}
  ; GFX940_ITERATIVE-NEXT:   [[PHI5:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE4]], %bb.7
  ; GFX940_ITERATIVE-NEXT:   [[PHI6:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_1]], %bb.7
  ; GFX940_ITERATIVE-NEXT:   [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY28:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[COPY29:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY29]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX940_ITERATIVE-NEXT:   [[COPY30:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0
  ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_ITERATIVE-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY27]]
  ; GFX940_ITERATIVE-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY31]], [[COPY32]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY30]]
  ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY33]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY34]], implicit $exec
  ; GFX940_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.3
  ;
  ; GFX940_DPP-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
  ; GFX940_DPP: bb.1 (%ir-block.0):
  ; GFX940_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
  ; GFX940_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; GFX940_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GFX940_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
  ; GFX940_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
  ; GFX940_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT:   S_BRANCH %bb.2
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.2 (%ir-block.5):
  ; GFX940_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
  ; GFX940_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
  ; GFX940_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
  ; GFX940_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
  ; GFX940_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GFX940_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
  ; GFX940_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
  ; GFX940_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
  ; GFX940_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
  ; GFX940_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY19:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
  ; GFX940_DPP-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY19]], [[V_ADD_F64_e64_5]], 312, 15, 15, 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
  ; GFX940_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
  ; GFX940_DPP-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
  ; GFX940_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
  ; GFX940_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
  ; GFX940_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
  ; GFX940_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY23]], implicit $exec
  ; GFX940_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT:   S_BRANCH %bb.3
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.3 (%ir-block.32):
  ; GFX940_DPP-NEXT:   successors: %bb.5(0x80000000)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  ; GFX940_DPP-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
  ; GFX940_DPP-NEXT:   S_BRANCH %bb.5
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.4.Flow:
  ; GFX940_DPP-NEXT:   successors: %bb.6(0x80000000)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %44, %bb.5, [[DEF]], %bb.1
  ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT:   S_BRANCH %bb.6
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.5 (%ir-block.35):
  ; GFX940_DPP-NEXT:   successors: %bb.4(0x80000000)
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
  ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
  ; GFX940_DPP-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
  ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
  ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
  ; GFX940_DPP-NEXT:   [[STRICT_WWM1:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[V_MOV_B6]], implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE4]]
  ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_6:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY26]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
  ; GFX940_DPP-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
  ; GFX940_DPP-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
  ; GFX940_DPP-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub0
  ; GFX940_DPP-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub1
  ; GFX940_DPP-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY29]], 0, [[COPY27]], [[V_CMP_EQ_U32_e64_]], implicit $exec
  ; GFX940_DPP-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY30]], 0, [[COPY28]], [[V_CMP_EQ_U32_e64_]], implicit $exec
  ; GFX940_DPP-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
  ; GFX940_DPP-NEXT:   S_BRANCH %bb.4
  ; GFX940_DPP-NEXT: {{  $}}
  ; GFX940_DPP-NEXT: bb.6 (%ir-block.41):
  ; GFX940_DPP-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
  ; GFX940_DPP-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
  ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
  ; GFX940_DPP-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
  ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
  ; GFX940_DPP-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
  ; GFX940_DPP-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
  ret double %ret
}

declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)

attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
