; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march nvptx64 --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}

; TODO: these are system scope, but are compiled to gpu scope..
; TODO: these are seq_cst, but are compiled to relaxed..


; CHECK-LABEL: relaxed_sys_i8
define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-LABEL: relaxed_sys_i8(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<21>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b8 %rs1, [relaxed_sys_i8_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i8_param_0];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    ld.param.b8 %r9, [relaxed_sys_i8_param_1];
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 255;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    and.b32 %r15, %r14, 255;
; SM30-NEXT:    shl.b32 %r3, %r15, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r16, [%rd1];
; SM30-NEXT:    and.b32 %r20, %r16, %r2;
; SM30-NEXT:  $L__BB0_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r17, %r20, %r3;
; SM30-NEXT:    or.b32 %r18, %r20, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT:    @%p1 bra $L__BB0_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB0_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT:    mov.b32 %r20, %r8;
; SM30-NEXT:    @%p2 bra $L__BB0_1;
; SM30-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: relaxed_sys_i8(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<21>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b8 %rs1, [relaxed_sys_i8_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i8_param_0];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    ld.param.b8 %r9, [relaxed_sys_i8_param_1];
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 255;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    and.b32 %r15, %r14, 255;
; SM70-NEXT:    shl.b32 %r3, %r15, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r16, [%rd1];
; SM70-NEXT:    and.b32 %r20, %r16, %r2;
; SM70-NEXT:  $L__BB0_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r17, %r20, %r3;
; SM70-NEXT:    or.b32 %r18, %r20, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT:    @%p1 bra $L__BB0_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT:    mov.b32 %r20, %r8;
; SM70-NEXT:    @%p2 bra $L__BB0_1;
; SM70-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: relaxed_sys_i8(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<21>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b8 %rs1, [relaxed_sys_i8_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i8_param_0];
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r9, %rd2;
; SM90-NEXT:    and.b32 %r10, %r9, 3;
; SM90-NEXT:    shl.b32 %r1, %r10, 3;
; SM90-NEXT:    mov.b32 %r11, 255;
; SM90-NEXT:    shl.b32 %r12, %r11, %r1;
; SM90-NEXT:    not.b32 %r2, %r12;
; SM90-NEXT:    cvt.u32.u16 %r13, %rs1;
; SM90-NEXT:    and.b32 %r14, %r13, 255;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    ld.param.b8 %r15, [relaxed_sys_i8_param_1];
; SM90-NEXT:    shl.b32 %r4, %r15, %r1;
; SM90-NEXT:    ld.b32 %r16, [%rd1];
; SM90-NEXT:    and.b32 %r20, %r16, %r2;
; SM90-NEXT:  $L__BB0_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r17, %r20, %r3;
; SM90-NEXT:    or.b32 %r18, %r20, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
; SM90-NEXT:    @%p1 bra $L__BB0_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB0_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
; SM90-NEXT:    mov.u32 %r20, %r8;
; SM90-NEXT:    @%p2 bra $L__BB0_1;
; SM90-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
; SM90-NEXT:    st.param.b32 [func_retval0], %r13;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic monotonic
  ret i8 %new
}

define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-LABEL: acquire_sys_i8(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<21>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b8 %rs1, [acquire_sys_i8_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [acquire_sys_i8_param_0];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    ld.param.b8 %r9, [acquire_sys_i8_param_1];
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 255;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    and.b32 %r15, %r14, 255;
; SM30-NEXT:    shl.b32 %r3, %r15, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r16, [%rd1];
; SM30-NEXT:    and.b32 %r20, %r16, %r2;
; SM30-NEXT:  $L__BB1_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r17, %r20, %r3;
; SM30-NEXT:    or.b32 %r18, %r20, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT:    @%p1 bra $L__BB1_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB1_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT:    mov.b32 %r20, %r8;
; SM30-NEXT:    @%p2 bra $L__BB1_1;
; SM30-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acquire_sys_i8(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<21>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b8 %rs1, [acquire_sys_i8_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [acquire_sys_i8_param_0];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    ld.param.b8 %r9, [acquire_sys_i8_param_1];
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 255;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    and.b32 %r15, %r14, 255;
; SM70-NEXT:    shl.b32 %r3, %r15, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r16, [%rd1];
; SM70-NEXT:    and.b32 %r20, %r16, %r2;
; SM70-NEXT:  $L__BB1_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r17, %r20, %r3;
; SM70-NEXT:    or.b32 %r18, %r20, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT:    @%p1 bra $L__BB1_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT:    mov.b32 %r20, %r8;
; SM70-NEXT:    @%p2 bra $L__BB1_1;
; SM70-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: acquire_sys_i8(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<21>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b8 %rs1, [acquire_sys_i8_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [acquire_sys_i8_param_0];
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r9, %rd2;
; SM90-NEXT:    and.b32 %r10, %r9, 3;
; SM90-NEXT:    shl.b32 %r1, %r10, 3;
; SM90-NEXT:    mov.b32 %r11, 255;
; SM90-NEXT:    shl.b32 %r12, %r11, %r1;
; SM90-NEXT:    not.b32 %r2, %r12;
; SM90-NEXT:    cvt.u32.u16 %r13, %rs1;
; SM90-NEXT:    and.b32 %r14, %r13, 255;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    ld.param.b8 %r15, [acquire_sys_i8_param_1];
; SM90-NEXT:    shl.b32 %r4, %r15, %r1;
; SM90-NEXT:    ld.b32 %r16, [%rd1];
; SM90-NEXT:    and.b32 %r20, %r16, %r2;
; SM90-NEXT:  $L__BB1_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r17, %r20, %r3;
; SM90-NEXT:    or.b32 %r18, %r20, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
; SM90-NEXT:    @%p1 bra $L__BB1_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB1_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
; SM90-NEXT:    mov.u32 %r20, %r8;
; SM90-NEXT:    @%p2 bra $L__BB1_1;
; SM90-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b32 [func_retval0], %r13;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acquire acquire
  ret i8 %new
}

define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-LABEL: release_sys_i8(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<21>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b8 %rs1, [release_sys_i8_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [release_sys_i8_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b8 %r9, [release_sys_i8_param_1];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 255;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    and.b32 %r15, %r14, 255;
; SM30-NEXT:    shl.b32 %r3, %r15, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r16, [%rd1];
; SM30-NEXT:    and.b32 %r20, %r16, %r2;
; SM30-NEXT:  $L__BB2_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r17, %r20, %r3;
; SM30-NEXT:    or.b32 %r18, %r20, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT:    @%p1 bra $L__BB2_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB2_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT:    mov.b32 %r20, %r8;
; SM30-NEXT:    @%p2 bra $L__BB2_1;
; SM30-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: release_sys_i8(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<21>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b8 %rs1, [release_sys_i8_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [release_sys_i8_param_0];
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    ld.param.b8 %r9, [release_sys_i8_param_1];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 255;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    and.b32 %r15, %r14, 255;
; SM70-NEXT:    shl.b32 %r3, %r15, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r16, [%rd1];
; SM70-NEXT:    and.b32 %r20, %r16, %r2;
; SM70-NEXT:  $L__BB2_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r17, %r20, %r3;
; SM70-NEXT:    or.b32 %r18, %r20, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT:    @%p1 bra $L__BB2_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB2_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT:    mov.b32 %r20, %r8;
; SM70-NEXT:    @%p2 bra $L__BB2_1;
; SM70-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: release_sys_i8(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<21>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b8 %rs1, [release_sys_i8_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [release_sys_i8_param_0];
; SM90-NEXT:    fence.release.sys;
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r9, %rd2;
; SM90-NEXT:    and.b32 %r10, %r9, 3;
; SM90-NEXT:    shl.b32 %r1, %r10, 3;
; SM90-NEXT:    mov.b32 %r11, 255;
; SM90-NEXT:    shl.b32 %r12, %r11, %r1;
; SM90-NEXT:    not.b32 %r2, %r12;
; SM90-NEXT:    cvt.u32.u16 %r13, %rs1;
; SM90-NEXT:    and.b32 %r14, %r13, 255;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    ld.param.b8 %r15, [release_sys_i8_param_1];
; SM90-NEXT:    shl.b32 %r4, %r15, %r1;
; SM90-NEXT:    ld.b32 %r16, [%rd1];
; SM90-NEXT:    and.b32 %r20, %r16, %r2;
; SM90-NEXT:  $L__BB2_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r17, %r20, %r3;
; SM90-NEXT:    or.b32 %r18, %r20, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
; SM90-NEXT:    @%p1 bra $L__BB2_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB2_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
; SM90-NEXT:    mov.u32 %r20, %r8;
; SM90-NEXT:    @%p2 bra $L__BB2_1;
; SM90-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
; SM90-NEXT:    st.param.b32 [func_retval0], %r13;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new release monotonic
  ret i8 %new
}

define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-LABEL: acq_rel_sys_i8(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<21>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b8 %rs1, [acq_rel_sys_i8_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i8_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b8 %r9, [acq_rel_sys_i8_param_1];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 255;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    and.b32 %r15, %r14, 255;
; SM30-NEXT:    shl.b32 %r3, %r15, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r16, [%rd1];
; SM30-NEXT:    and.b32 %r20, %r16, %r2;
; SM30-NEXT:  $L__BB3_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r17, %r20, %r3;
; SM30-NEXT:    or.b32 %r18, %r20, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT:    @%p1 bra $L__BB3_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB3_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT:    mov.b32 %r20, %r8;
; SM30-NEXT:    @%p2 bra $L__BB3_1;
; SM30-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acq_rel_sys_i8(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<21>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b8 %rs1, [acq_rel_sys_i8_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i8_param_0];
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    ld.param.b8 %r9, [acq_rel_sys_i8_param_1];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 255;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    and.b32 %r15, %r14, 255;
; SM70-NEXT:    shl.b32 %r3, %r15, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r16, [%rd1];
; SM70-NEXT:    and.b32 %r20, %r16, %r2;
; SM70-NEXT:  $L__BB3_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r17, %r20, %r3;
; SM70-NEXT:    or.b32 %r18, %r20, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT:    @%p1 bra $L__BB3_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB3_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT:    mov.b32 %r20, %r8;
; SM70-NEXT:    @%p2 bra $L__BB3_1;
; SM70-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: acq_rel_sys_i8(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<21>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b8 %rs1, [acq_rel_sys_i8_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i8_param_0];
; SM90-NEXT:    fence.release.sys;
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r9, %rd2;
; SM90-NEXT:    and.b32 %r10, %r9, 3;
; SM90-NEXT:    shl.b32 %r1, %r10, 3;
; SM90-NEXT:    mov.b32 %r11, 255;
; SM90-NEXT:    shl.b32 %r12, %r11, %r1;
; SM90-NEXT:    not.b32 %r2, %r12;
; SM90-NEXT:    cvt.u32.u16 %r13, %rs1;
; SM90-NEXT:    and.b32 %r14, %r13, 255;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    ld.param.b8 %r15, [acq_rel_sys_i8_param_1];
; SM90-NEXT:    shl.b32 %r4, %r15, %r1;
; SM90-NEXT:    ld.b32 %r16, [%rd1];
; SM90-NEXT:    and.b32 %r20, %r16, %r2;
; SM90-NEXT:  $L__BB3_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r17, %r20, %r3;
; SM90-NEXT:    or.b32 %r18, %r20, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
; SM90-NEXT:    @%p1 bra $L__BB3_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB3_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
; SM90-NEXT:    mov.u32 %r20, %r8;
; SM90-NEXT:    @%p2 bra $L__BB3_1;
; SM90-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b32 [func_retval0], %r13;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acq_rel acquire
  ret i8 %new
}

define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-LABEL: seq_cst_sys_i8(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<21>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b8 %rs1, [seq_cst_sys_i8_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i8_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b8 %r9, [seq_cst_sys_i8_param_1];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 255;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    and.b32 %r15, %r14, 255;
; SM30-NEXT:    shl.b32 %r3, %r15, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r16, [%rd1];
; SM30-NEXT:    and.b32 %r20, %r16, %r2;
; SM30-NEXT:  $L__BB4_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r17, %r20, %r3;
; SM30-NEXT:    or.b32 %r18, %r20, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT:    @%p1 bra $L__BB4_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB4_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT:    mov.b32 %r20, %r8;
; SM30-NEXT:    @%p2 bra $L__BB4_1;
; SM30-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: seq_cst_sys_i8(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<21>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b8 %rs1, [seq_cst_sys_i8_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i8_param_0];
; SM70-NEXT:    fence.sc.sys;
; SM70-NEXT:    ld.param.b8 %r9, [seq_cst_sys_i8_param_1];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 255;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    and.b32 %r15, %r14, 255;
; SM70-NEXT:    shl.b32 %r3, %r15, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r16, [%rd1];
; SM70-NEXT:    and.b32 %r20, %r16, %r2;
; SM70-NEXT:  $L__BB4_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r17, %r20, %r3;
; SM70-NEXT:    or.b32 %r18, %r20, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT:    @%p1 bra $L__BB4_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB4_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT:    mov.b32 %r20, %r8;
; SM70-NEXT:    @%p2 bra $L__BB4_1;
; SM70-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: seq_cst_sys_i8(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<21>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b8 %rs1, [seq_cst_sys_i8_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i8_param_0];
; SM90-NEXT:    fence.sc.sys;
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r9, %rd2;
; SM90-NEXT:    and.b32 %r10, %r9, 3;
; SM90-NEXT:    shl.b32 %r1, %r10, 3;
; SM90-NEXT:    mov.b32 %r11, 255;
; SM90-NEXT:    shl.b32 %r12, %r11, %r1;
; SM90-NEXT:    not.b32 %r2, %r12;
; SM90-NEXT:    cvt.u32.u16 %r13, %rs1;
; SM90-NEXT:    and.b32 %r14, %r13, 255;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    ld.param.b8 %r15, [seq_cst_sys_i8_param_1];
; SM90-NEXT:    shl.b32 %r4, %r15, %r1;
; SM90-NEXT:    ld.b32 %r16, [%rd1];
; SM90-NEXT:    and.b32 %r20, %r16, %r2;
; SM90-NEXT:  $L__BB4_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r17, %r20, %r3;
; SM90-NEXT:    or.b32 %r18, %r20, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
; SM90-NEXT:    @%p1 bra $L__BB4_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB4_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
; SM90-NEXT:    mov.u32 %r20, %r8;
; SM90-NEXT:    @%p2 bra $L__BB4_1;
; SM90-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b32 [func_retval0], %r13;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new seq_cst seq_cst
  ret i8 %new
}

; CHECK-LABEL: relaxed_sys_i16
define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-LABEL: relaxed_sys_i16(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<20>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b16 %rs1, [relaxed_sys_i16_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i16_param_0];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    ld.param.b16 %r9, [relaxed_sys_i16_param_1];
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 65535;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    shl.b32 %r3, %r14, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r15, [%rd1];
; SM30-NEXT:    and.b32 %r19, %r15, %r2;
; SM30-NEXT:  $L__BB5_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r16, %r19, %r3;
; SM30-NEXT:    or.b32 %r17, %r19, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT:    @%p1 bra $L__BB5_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB5_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT:    mov.b32 %r19, %r8;
; SM30-NEXT:    @%p2 bra $L__BB5_1;
; SM30-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: relaxed_sys_i16(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<20>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b16 %rs1, [relaxed_sys_i16_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i16_param_0];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    ld.param.b16 %r9, [relaxed_sys_i16_param_1];
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 65535;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    shl.b32 %r3, %r14, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r15, [%rd1];
; SM70-NEXT:    and.b32 %r19, %r15, %r2;
; SM70-NEXT:  $L__BB5_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r16, %r19, %r3;
; SM70-NEXT:    or.b32 %r17, %r19, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT:    @%p1 bra $L__BB5_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB5_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT:    mov.b32 %r19, %r8;
; SM70-NEXT:    @%p2 bra $L__BB5_1;
; SM70-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: relaxed_sys_i16(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<20>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b16 %rs1, [relaxed_sys_i16_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i16_param_0];
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    ld.param.b16 %r9, [relaxed_sys_i16_param_1];
; SM90-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM90-NEXT:    and.b32 %r11, %r10, 3;
; SM90-NEXT:    shl.b32 %r1, %r11, 3;
; SM90-NEXT:    mov.b32 %r12, 65535;
; SM90-NEXT:    shl.b32 %r13, %r12, %r1;
; SM90-NEXT:    not.b32 %r2, %r13;
; SM90-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    shl.b32 %r4, %r9, %r1;
; SM90-NEXT:    ld.b32 %r15, [%rd1];
; SM90-NEXT:    and.b32 %r19, %r15, %r2;
; SM90-NEXT:  $L__BB5_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r16, %r19, %r3;
; SM90-NEXT:    or.b32 %r17, %r19, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
; SM90-NEXT:    @%p1 bra $L__BB5_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB5_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
; SM90-NEXT:    mov.u32 %r19, %r8;
; SM90-NEXT:    @%p2 bra $L__BB5_1;
; SM90-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
; SM90-NEXT:    st.param.b32 [func_retval0], %r14;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic monotonic
  ret i16 %new
}

define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-LABEL: acquire_sys_i16(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<20>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b16 %rs1, [acquire_sys_i16_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [acquire_sys_i16_param_0];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    ld.param.b16 %r9, [acquire_sys_i16_param_1];
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 65535;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    shl.b32 %r3, %r14, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r15, [%rd1];
; SM30-NEXT:    and.b32 %r19, %r15, %r2;
; SM30-NEXT:  $L__BB6_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r16, %r19, %r3;
; SM30-NEXT:    or.b32 %r17, %r19, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT:    @%p1 bra $L__BB6_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB6_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT:    mov.b32 %r19, %r8;
; SM30-NEXT:    @%p2 bra $L__BB6_1;
; SM30-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acquire_sys_i16(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<20>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b16 %rs1, [acquire_sys_i16_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [acquire_sys_i16_param_0];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    ld.param.b16 %r9, [acquire_sys_i16_param_1];
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 65535;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    shl.b32 %r3, %r14, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r15, [%rd1];
; SM70-NEXT:    and.b32 %r19, %r15, %r2;
; SM70-NEXT:  $L__BB6_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r16, %r19, %r3;
; SM70-NEXT:    or.b32 %r17, %r19, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT:    @%p1 bra $L__BB6_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB6_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT:    mov.b32 %r19, %r8;
; SM70-NEXT:    @%p2 bra $L__BB6_1;
; SM70-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: acquire_sys_i16(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<20>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b16 %rs1, [acquire_sys_i16_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [acquire_sys_i16_param_0];
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    ld.param.b16 %r9, [acquire_sys_i16_param_1];
; SM90-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM90-NEXT:    and.b32 %r11, %r10, 3;
; SM90-NEXT:    shl.b32 %r1, %r11, 3;
; SM90-NEXT:    mov.b32 %r12, 65535;
; SM90-NEXT:    shl.b32 %r13, %r12, %r1;
; SM90-NEXT:    not.b32 %r2, %r13;
; SM90-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    shl.b32 %r4, %r9, %r1;
; SM90-NEXT:    ld.b32 %r15, [%rd1];
; SM90-NEXT:    and.b32 %r19, %r15, %r2;
; SM90-NEXT:  $L__BB6_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r16, %r19, %r3;
; SM90-NEXT:    or.b32 %r17, %r19, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
; SM90-NEXT:    @%p1 bra $L__BB6_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB6_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
; SM90-NEXT:    mov.u32 %r19, %r8;
; SM90-NEXT:    @%p2 bra $L__BB6_1;
; SM90-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b32 [func_retval0], %r14;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acquire acquire
  ret i16 %new
}

define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-LABEL: release_sys_i16(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<20>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b16 %rs1, [release_sys_i16_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [release_sys_i16_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b16 %r9, [release_sys_i16_param_1];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 65535;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    shl.b32 %r3, %r14, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r15, [%rd1];
; SM30-NEXT:    and.b32 %r19, %r15, %r2;
; SM30-NEXT:  $L__BB7_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r16, %r19, %r3;
; SM30-NEXT:    or.b32 %r17, %r19, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT:    @%p1 bra $L__BB7_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB7_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT:    mov.b32 %r19, %r8;
; SM30-NEXT:    @%p2 bra $L__BB7_1;
; SM30-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: release_sys_i16(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<20>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b16 %rs1, [release_sys_i16_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [release_sys_i16_param_0];
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    ld.param.b16 %r9, [release_sys_i16_param_1];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 65535;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    shl.b32 %r3, %r14, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r15, [%rd1];
; SM70-NEXT:    and.b32 %r19, %r15, %r2;
; SM70-NEXT:  $L__BB7_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r16, %r19, %r3;
; SM70-NEXT:    or.b32 %r17, %r19, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT:    @%p1 bra $L__BB7_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB7_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT:    mov.b32 %r19, %r8;
; SM70-NEXT:    @%p2 bra $L__BB7_1;
; SM70-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: release_sys_i16(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<20>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b16 %rs1, [release_sys_i16_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [release_sys_i16_param_0];
; SM90-NEXT:    fence.release.sys;
; SM90-NEXT:    ld.param.b16 %r9, [release_sys_i16_param_1];
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM90-NEXT:    and.b32 %r11, %r10, 3;
; SM90-NEXT:    shl.b32 %r1, %r11, 3;
; SM90-NEXT:    mov.b32 %r12, 65535;
; SM90-NEXT:    shl.b32 %r13, %r12, %r1;
; SM90-NEXT:    not.b32 %r2, %r13;
; SM90-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    shl.b32 %r4, %r9, %r1;
; SM90-NEXT:    ld.b32 %r15, [%rd1];
; SM90-NEXT:    and.b32 %r19, %r15, %r2;
; SM90-NEXT:  $L__BB7_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r16, %r19, %r3;
; SM90-NEXT:    or.b32 %r17, %r19, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
; SM90-NEXT:    @%p1 bra $L__BB7_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB7_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
; SM90-NEXT:    mov.u32 %r19, %r8;
; SM90-NEXT:    @%p2 bra $L__BB7_1;
; SM90-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
; SM90-NEXT:    st.param.b32 [func_retval0], %r14;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new release monotonic
  ret i16 %new
}

define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-LABEL: acq_rel_sys_i16(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<20>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b16 %rs1, [acq_rel_sys_i16_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i16_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b16 %r9, [acq_rel_sys_i16_param_1];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 65535;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    shl.b32 %r3, %r14, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r15, [%rd1];
; SM30-NEXT:    and.b32 %r19, %r15, %r2;
; SM30-NEXT:  $L__BB8_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r16, %r19, %r3;
; SM30-NEXT:    or.b32 %r17, %r19, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT:    @%p1 bra $L__BB8_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB8_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT:    mov.b32 %r19, %r8;
; SM30-NEXT:    @%p2 bra $L__BB8_1;
; SM30-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acq_rel_sys_i16(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<20>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b16 %rs1, [acq_rel_sys_i16_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i16_param_0];
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    ld.param.b16 %r9, [acq_rel_sys_i16_param_1];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 65535;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    shl.b32 %r3, %r14, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r15, [%rd1];
; SM70-NEXT:    and.b32 %r19, %r15, %r2;
; SM70-NEXT:  $L__BB8_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r16, %r19, %r3;
; SM70-NEXT:    or.b32 %r17, %r19, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT:    @%p1 bra $L__BB8_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB8_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT:    mov.b32 %r19, %r8;
; SM70-NEXT:    @%p2 bra $L__BB8_1;
; SM70-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: acq_rel_sys_i16(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<20>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b16 %rs1, [acq_rel_sys_i16_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i16_param_0];
; SM90-NEXT:    fence.release.sys;
; SM90-NEXT:    ld.param.b16 %r9, [acq_rel_sys_i16_param_1];
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM90-NEXT:    and.b32 %r11, %r10, 3;
; SM90-NEXT:    shl.b32 %r1, %r11, 3;
; SM90-NEXT:    mov.b32 %r12, 65535;
; SM90-NEXT:    shl.b32 %r13, %r12, %r1;
; SM90-NEXT:    not.b32 %r2, %r13;
; SM90-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    shl.b32 %r4, %r9, %r1;
; SM90-NEXT:    ld.b32 %r15, [%rd1];
; SM90-NEXT:    and.b32 %r19, %r15, %r2;
; SM90-NEXT:  $L__BB8_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r16, %r19, %r3;
; SM90-NEXT:    or.b32 %r17, %r19, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
; SM90-NEXT:    @%p1 bra $L__BB8_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB8_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
; SM90-NEXT:    mov.u32 %r19, %r8;
; SM90-NEXT:    @%p2 bra $L__BB8_1;
; SM90-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b32 [func_retval0], %r14;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acq_rel acquire
  ret i16 %new
}

; CHECK-LABEL: seq_cst_sys_i16
define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-LABEL: seq_cst_sys_i16(
; SM30:       {
; SM30-NEXT:    .reg .pred %p<3>;
; SM30-NEXT:    .reg .b16 %rs<2>;
; SM30-NEXT:    .reg .b32 %r<20>;
; SM30-NEXT:    .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b16 %rs1, [seq_cst_sys_i16_param_2];
; SM30-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i16_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b16 %r9, [seq_cst_sys_i16_param_1];
; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM30-NEXT:    and.b32 %r11, %r10, 3;
; SM30-NEXT:    shl.b32 %r1, %r11, 3;
; SM30-NEXT:    mov.b32 %r12, 65535;
; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
; SM30-NEXT:    not.b32 %r2, %r13;
; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM30-NEXT:    shl.b32 %r3, %r14, %r1;
; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
; SM30-NEXT:    ld.b32 %r15, [%rd1];
; SM30-NEXT:    and.b32 %r19, %r15, %r2;
; SM30-NEXT:  $L__BB9_1: // %partword.cmpxchg.loop
; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
; SM30-NEXT:    or.b32 %r16, %r19, %r3;
; SM30-NEXT:    or.b32 %r17, %r19, %r4;
; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT:    @%p1 bra $L__BB9_3;
; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT:    // in Loop: Header=BB9_1 Depth=1
; SM30-NEXT:    and.b32 %r8, %r7, %r2;
; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT:    mov.b32 %r19, %r8;
; SM30-NEXT:    @%p2 bra $L__BB9_1;
; SM30-NEXT:  $L__BB9_3: // %partword.cmpxchg.end
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
; SM30-NEXT:    ret;
;
; SM70-LABEL: seq_cst_sys_i16(
; SM70:       {
; SM70-NEXT:    .reg .pred %p<3>;
; SM70-NEXT:    .reg .b16 %rs<2>;
; SM70-NEXT:    .reg .b32 %r<20>;
; SM70-NEXT:    .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b16 %rs1, [seq_cst_sys_i16_param_2];
; SM70-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i16_param_0];
; SM70-NEXT:    fence.sc.sys;
; SM70-NEXT:    ld.param.b16 %r9, [seq_cst_sys_i16_param_1];
; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM70-NEXT:    and.b32 %r11, %r10, 3;
; SM70-NEXT:    shl.b32 %r1, %r11, 3;
; SM70-NEXT:    mov.b32 %r12, 65535;
; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
; SM70-NEXT:    not.b32 %r2, %r13;
; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM70-NEXT:    shl.b32 %r3, %r14, %r1;
; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
; SM70-NEXT:    ld.b32 %r15, [%rd1];
; SM70-NEXT:    and.b32 %r19, %r15, %r2;
; SM70-NEXT:  $L__BB9_1: // %partword.cmpxchg.loop
; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
; SM70-NEXT:    or.b32 %r16, %r19, %r3;
; SM70-NEXT:    or.b32 %r17, %r19, %r4;
; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT:    @%p1 bra $L__BB9_3;
; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT:    // in Loop: Header=BB9_1 Depth=1
; SM70-NEXT:    and.b32 %r8, %r7, %r2;
; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT:    mov.b32 %r19, %r8;
; SM70-NEXT:    @%p2 bra $L__BB9_1;
; SM70-NEXT:  $L__BB9_3: // %partword.cmpxchg.end
; SM70-NEXT:    fence.acq_rel.sys;
; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
; SM70-NEXT:    ret;
; SM90-LABEL: seq_cst_sys_i16(
; SM90:       {
; SM90-NEXT:    .reg .pred %p<3>;
; SM90-NEXT:    .reg .b16 %rs<2>;
; SM90-NEXT:    .reg .b32 %r<20>;
; SM90-NEXT:    .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b16 %rs1, [seq_cst_sys_i16_param_2];
; SM90-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i16_param_0];
; SM90-NEXT:    fence.sc.sys;
; SM90-NEXT:    ld.param.b16 %r9, [seq_cst_sys_i16_param_1];
; SM90-NEXT:    and.b64 %rd1, %rd2, -4;
; SM90-NEXT:    cvt.u32.u64 %r10, %rd2;
; SM90-NEXT:    and.b32 %r11, %r10, 3;
; SM90-NEXT:    shl.b32 %r1, %r11, 3;
; SM90-NEXT:    mov.b32 %r12, 65535;
; SM90-NEXT:    shl.b32 %r13, %r12, %r1;
; SM90-NEXT:    not.b32 %r2, %r13;
; SM90-NEXT:    cvt.u32.u16 %r14, %rs1;
; SM90-NEXT:    shl.b32 %r3, %r14, %r1;
; SM90-NEXT:    shl.b32 %r4, %r9, %r1;
; SM90-NEXT:    ld.b32 %r15, [%rd1];
; SM90-NEXT:    and.b32 %r19, %r15, %r2;
; SM90-NEXT:  $L__BB9_1: // %partword.cmpxchg.loop
; SM90-NEXT:    // =>This Inner Loop Header: Depth=1
; SM90-NEXT:    or.b32 %r16, %r19, %r3;
; SM90-NEXT:    or.b32 %r17, %r19, %r4;
; SM90-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
; SM90-NEXT:    @%p1 bra $L__BB9_3;
; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT:    // in Loop: Header=BB9_1 Depth=1
; SM90-NEXT:    and.b32 %r8, %r7, %r2;
; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
; SM90-NEXT:    mov.u32 %r19, %r8;
; SM90-NEXT:    @%p2 bra $L__BB9_1;
; SM90-NEXT:  $L__BB9_3: // %partword.cmpxchg.end
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b32 [func_retval0], %r14;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new seq_cst seq_cst
  ret i16 %new
}

; CHECK-LABEL: relaxed_sys_i32
define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) {
; SM30-LABEL: relaxed_sys_i32(
; SM30:       {
; SM30-NEXT:    .reg .b32 %r<4>;
; SM30-NEXT:    .reg .b64 %rd<2>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [relaxed_sys_i32_param_0];
; SM30-NEXT:    ld.param.b32 %r1, [relaxed_sys_i32_param_1];
; SM30-NEXT:    ld.param.b32 %r2, [relaxed_sys_i32_param_2];
; SM30-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM30-NEXT:    st.param.b32 [func_retval0], %r2;
; SM30-NEXT:    ret;
;
; SM70-LABEL: relaxed_sys_i32(
; SM70:       {
; SM70-NEXT:    .reg .b32 %r<4>;
; SM70-NEXT:    .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [relaxed_sys_i32_param_0];
; SM70-NEXT:    ld.param.b32 %r1, [relaxed_sys_i32_param_1];
; SM70-NEXT:    ld.param.b32 %r2, [relaxed_sys_i32_param_2];
; SM70-NEXT:    atom.relaxed.cas.b32 %r3, [%rd1], %r1, %r2;
; SM70-NEXT:    st.param.b32 [func_retval0], %r2;
; SM70-NEXT:    ret;
; SM90-LABEL: relaxed_sys_i32(
; SM90:       {
; SM90-NEXT:    .reg .b32 %r<4>;
; SM90-NEXT:    .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [relaxed_sys_i32_param_0];
; SM90-NEXT:    ld.param.b32 %r1, [relaxed_sys_i32_param_1];
; SM90-NEXT:    ld.param.b32 %r2, [relaxed_sys_i32_param_2];
; SM90-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM90-NEXT:    st.param.b32 [func_retval0], %r2;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic monotonic
  ret i32 %new
}

define i32 @acq_rel_sys_i32(ptr %addr, i32 %cmp, i32 %new) {
; SM30-LABEL: acq_rel_sys_i32(
; SM30:       {
; SM30-NEXT:    .reg .b32 %r<4>;
; SM30-NEXT:    .reg .b64 %rd<2>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [acq_rel_sys_i32_param_0];
; SM30-NEXT:    ld.param.b32 %r1, [acq_rel_sys_i32_param_1];
; SM30-NEXT:    ld.param.b32 %r2, [acq_rel_sys_i32_param_2];
; SM30-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM30-NEXT:    st.param.b32 [func_retval0], %r2;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acq_rel_sys_i32(
; SM70:       {
; SM70-NEXT:    .reg .b32 %r<4>;
; SM70-NEXT:    .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [acq_rel_sys_i32_param_0];
; SM70-NEXT:    ld.param.b32 %r1, [acq_rel_sys_i32_param_1];
; SM70-NEXT:    ld.param.b32 %r2, [acq_rel_sys_i32_param_2];
; SM70-NEXT:    atom.acq_rel.cas.b32 %r3, [%rd1], %r1, %r2;
; SM70-NEXT:    st.param.b32 [func_retval0], %r2;
; SM70-NEXT:    ret;
; SM90-LABEL: acq_rel_sys_i32(
; SM90:       {
; SM90-NEXT:    .reg .b32 %r<4>;
; SM90-NEXT:    .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [acq_rel_sys_i32_param_0];
; SM90-NEXT:    ld.param.b32 %r1, [acq_rel_sys_i32_param_1];
; SM90-NEXT:    ld.param.b32 %r2, [acq_rel_sys_i32_param_2];
; SM90-NEXT:    atom.acq_rel.cas.b32 %r3, [%rd1], %r1, %r2;
; SM90-NEXT:    st.param.b32 [func_retval0], %r2;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acq_rel acquire
  ret i32 %new
}

define i32 @acquire_sys_i32(ptr %addr, i32 %cmp, i32 %new) {
; SM30-LABEL: acquire_sys_i32(
; SM30:       {
; SM30-NEXT:    .reg .b32 %r<4>;
; SM30-NEXT:    .reg .b64 %rd<2>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [acquire_sys_i32_param_0];
; SM30-NEXT:    ld.param.b32 %r1, [acquire_sys_i32_param_1];
; SM30-NEXT:    ld.param.b32 %r2, [acquire_sys_i32_param_2];
; SM30-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM30-NEXT:    st.param.b32 [func_retval0], %r2;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acquire_sys_i32(
; SM70:       {
; SM70-NEXT:    .reg .b32 %r<4>;
; SM70-NEXT:    .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [acquire_sys_i32_param_0];
; SM70-NEXT:    ld.param.b32 %r1, [acquire_sys_i32_param_1];
; SM70-NEXT:    ld.param.b32 %r2, [acquire_sys_i32_param_2];
; SM70-NEXT:    atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2;
; SM70-NEXT:    st.param.b32 [func_retval0], %r2;
; SM70-NEXT:    ret;
; SM90-LABEL: acquire_sys_i32(
; SM90:       {
; SM90-NEXT:    .reg .b32 %r<4>;
; SM90-NEXT:    .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [acquire_sys_i32_param_0];
; SM90-NEXT:    ld.param.b32 %r1, [acquire_sys_i32_param_1];
; SM90-NEXT:    ld.param.b32 %r2, [acquire_sys_i32_param_2];
; SM90-NEXT:    atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2;
; SM90-NEXT:    st.param.b32 [func_retval0], %r2;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acquire acquire
  ret i32 %new
}

define i32 @release_sys_i32(ptr %addr, i32 %cmp, i32 %new) {
; SM30-LABEL: release_sys_i32(
; SM30:       {
; SM30-NEXT:    .reg .b32 %r<4>;
; SM30-NEXT:    .reg .b64 %rd<2>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [release_sys_i32_param_0];
; SM30-NEXT:    ld.param.b32 %r1, [release_sys_i32_param_1];
; SM30-NEXT:    ld.param.b32 %r2, [release_sys_i32_param_2];
; SM30-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM30-NEXT:    st.param.b32 [func_retval0], %r2;
; SM30-NEXT:    ret;
;
; SM70-LABEL: release_sys_i32(
; SM70:       {
; SM70-NEXT:    .reg .b32 %r<4>;
; SM70-NEXT:    .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [release_sys_i32_param_0];
; SM70-NEXT:    ld.param.b32 %r1, [release_sys_i32_param_1];
; SM70-NEXT:    ld.param.b32 %r2, [release_sys_i32_param_2];
; SM70-NEXT:    atom.release.cas.b32 %r3, [%rd1], %r1, %r2;
; SM70-NEXT:    st.param.b32 [func_retval0], %r2;
; SM70-NEXT:    ret;
; SM90-LABEL: release_sys_i32(
; SM90:       {
; SM90-NEXT:    .reg .b32 %r<4>;
; SM90-NEXT:    .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [release_sys_i32_param_0];
; SM90-NEXT:    ld.param.b32 %r1, [release_sys_i32_param_1];
; SM90-NEXT:    ld.param.b32 %r2, [release_sys_i32_param_2];
; SM90-NEXT:    atom.release.cas.b32 %r3, [%rd1], %r1, %r2;
; SM90-NEXT:    st.param.b32 [func_retval0], %r2;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new release monotonic
  ret i32 %new
}

define i32 @seq_cst_sys_i32(ptr %addr, i32 %cmp, i32 %new) {
; SM30-LABEL: seq_cst_sys_i32(
; SM30:       {
; SM30-NEXT:    .reg .b32 %r<4>;
; SM30-NEXT:    .reg .b64 %rd<2>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [seq_cst_sys_i32_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b32 %r1, [seq_cst_sys_i32_param_1];
; SM30-NEXT:    ld.param.b32 %r2, [seq_cst_sys_i32_param_2];
; SM30-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM30-NEXT:    st.param.b32 [func_retval0], %r2;
; SM30-NEXT:    ret;
;
; SM70-LABEL: seq_cst_sys_i32(
; SM70:       {
; SM70-NEXT:    .reg .b32 %r<4>;
; SM70-NEXT:    .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [seq_cst_sys_i32_param_0];
; SM70-NEXT:    fence.sc.sys;
; SM70-NEXT:    ld.param.b32 %r1, [seq_cst_sys_i32_param_1];
; SM70-NEXT:    ld.param.b32 %r2, [seq_cst_sys_i32_param_2];
; SM70-NEXT:    atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2;
; SM70-NEXT:    st.param.b32 [func_retval0], %r2;
; SM70-NEXT:    ret;
; SM90-LABEL: seq_cst_sys_i32(
; SM90:       {
; SM90-NEXT:    .reg .b32 %r<4>;
; SM90-NEXT:    .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [seq_cst_sys_i32_param_0];
; SM90-NEXT:    fence.sc.sys;
; SM90-NEXT:    ld.param.b32 %r1, [seq_cst_sys_i32_param_1];
; SM90-NEXT:    ld.param.b32 %r2, [seq_cst_sys_i32_param_2];
; SM90-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b32 [func_retval0], %r2;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst
  ret i32 %new
}

; CHECK-LABEL: relaxed_sys_i64
define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) {
; SM30-LABEL: relaxed_sys_i64(
; SM30:       {
; SM30-NEXT:    .reg .b64 %rd<5>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [relaxed_sys_i64_param_0];
; SM30-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i64_param_1];
; SM30-NEXT:    ld.param.b64 %rd3, [relaxed_sys_i64_param_2];
; SM30-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM30-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM30-NEXT:    ret;
;
; SM70-LABEL: relaxed_sys_i64(
; SM70:       {
; SM70-NEXT:    .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [relaxed_sys_i64_param_0];
; SM70-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i64_param_1];
; SM70-NEXT:    ld.param.b64 %rd3, [relaxed_sys_i64_param_2];
; SM70-NEXT:    atom.relaxed.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM70-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM70-NEXT:    ret;
; SM90-LABEL: relaxed_sys_i64(
; SM90:       {
; SM90-NEXT:    .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [relaxed_sys_i64_param_0];
; SM90-NEXT:    ld.param.b64 %rd2, [relaxed_sys_i64_param_1];
; SM90-NEXT:    ld.param.b64 %rd3, [relaxed_sys_i64_param_2];
; SM90-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM90-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic monotonic
  ret i64 %new
}

define i64 @acquire_sys_i64(ptr %addr, i64 %cmp, i64 %new) {
; SM30-LABEL: acquire_sys_i64(
; SM30:       {
; SM30-NEXT:    .reg .b64 %rd<5>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [acquire_sys_i64_param_0];
; SM30-NEXT:    ld.param.b64 %rd2, [acquire_sys_i64_param_1];
; SM30-NEXT:    ld.param.b64 %rd3, [acquire_sys_i64_param_2];
; SM30-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM30-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acquire_sys_i64(
; SM70:       {
; SM70-NEXT:    .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [acquire_sys_i64_param_0];
; SM70-NEXT:    ld.param.b64 %rd2, [acquire_sys_i64_param_1];
; SM70-NEXT:    ld.param.b64 %rd3, [acquire_sys_i64_param_2];
; SM70-NEXT:    atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM70-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM70-NEXT:    ret;
; SM90-LABEL: acquire_sys_i64(
; SM90:       {
; SM90-NEXT:    .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [acquire_sys_i64_param_0];
; SM90-NEXT:    ld.param.b64 %rd2, [acquire_sys_i64_param_1];
; SM90-NEXT:    ld.param.b64 %rd3, [acquire_sys_i64_param_2];
; SM90-NEXT:    atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM90-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acquire acquire
  ret i64 %new
}

define i64 @acq_rel_sys_i64(ptr %addr, i64 %cmp, i64 %new) {
; SM30-LABEL: acq_rel_sys_i64(
; SM30:       {
; SM30-NEXT:    .reg .b64 %rd<5>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [acq_rel_sys_i64_param_0];
; SM30-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i64_param_1];
; SM30-NEXT:    ld.param.b64 %rd3, [acq_rel_sys_i64_param_2];
; SM30-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM30-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM30-NEXT:    ret;
;
; SM70-LABEL: acq_rel_sys_i64(
; SM70:       {
; SM70-NEXT:    .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [acq_rel_sys_i64_param_0];
; SM70-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i64_param_1];
; SM70-NEXT:    ld.param.b64 %rd3, [acq_rel_sys_i64_param_2];
; SM70-NEXT:    atom.acq_rel.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM70-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM70-NEXT:    ret;
; SM90-LABEL: acq_rel_sys_i64(
; SM90:       {
; SM90-NEXT:    .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [acq_rel_sys_i64_param_0];
; SM90-NEXT:    ld.param.b64 %rd2, [acq_rel_sys_i64_param_1];
; SM90-NEXT:    ld.param.b64 %rd3, [acq_rel_sys_i64_param_2];
; SM90-NEXT:    atom.acq_rel.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM90-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acq_rel acquire
  ret i64 %new
}

define i64 @release_sys_i64(ptr %addr, i64 %cmp, i64 %new) {
; SM30-LABEL: release_sys_i64(
; SM30:       {
; SM30-NEXT:    .reg .b64 %rd<5>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [release_sys_i64_param_0];
; SM30-NEXT:    ld.param.b64 %rd2, [release_sys_i64_param_1];
; SM30-NEXT:    ld.param.b64 %rd3, [release_sys_i64_param_2];
; SM30-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM30-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM30-NEXT:    ret;
;
; SM70-LABEL: release_sys_i64(
; SM70:       {
; SM70-NEXT:    .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [release_sys_i64_param_0];
; SM70-NEXT:    ld.param.b64 %rd2, [release_sys_i64_param_1];
; SM70-NEXT:    ld.param.b64 %rd3, [release_sys_i64_param_2];
; SM70-NEXT:    atom.release.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM70-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM70-NEXT:    ret;
; SM90-LABEL: release_sys_i64(
; SM90:       {
; SM90-NEXT:    .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [release_sys_i64_param_0];
; SM90-NEXT:    ld.param.b64 %rd2, [release_sys_i64_param_1];
; SM90-NEXT:    ld.param.b64 %rd3, [release_sys_i64_param_2];
; SM90-NEXT:    atom.release.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM90-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new release monotonic
  ret i64 %new
}

define i64 @seq_cst_sys_i64(ptr %addr, i64 %cmp, i64 %new) {
; SM30-LABEL: seq_cst_sys_i64(
; SM30:       {
; SM30-NEXT:    .reg .b64 %rd<5>;
; SM30-EMPTY:
; SM30-NEXT:  // %bb.0:
; SM30-NEXT:    ld.param.b64 %rd1, [seq_cst_sys_i64_param_0];
; SM30-NEXT:    membar.sys;
; SM30-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i64_param_1];
; SM30-NEXT:    ld.param.b64 %rd3, [seq_cst_sys_i64_param_2];
; SM30-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM30-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM30-NEXT:    ret;
;
; SM70-LABEL: seq_cst_sys_i64(
; SM70:       {
; SM70-NEXT:    .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT:  // %bb.0:
; SM70-NEXT:    ld.param.b64 %rd1, [seq_cst_sys_i64_param_0];
; SM70-NEXT:    fence.sc.sys;
; SM70-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i64_param_1];
; SM70-NEXT:    ld.param.b64 %rd3, [seq_cst_sys_i64_param_2];
; SM70-NEXT:    atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM70-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM70-NEXT:    ret;
; SM90-LABEL: seq_cst_sys_i64(
; SM90:       {
; SM90-NEXT:    .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT:  // %bb.0:
; SM90-NEXT:    ld.param.b64 %rd1, [seq_cst_sys_i64_param_0];
; SM90-NEXT:    fence.sc.sys;
; SM90-NEXT:    ld.param.b64 %rd2, [seq_cst_sys_i64_param_1];
; SM90-NEXT:    ld.param.b64 %rd3, [seq_cst_sys_i64_param_2];
; SM90-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM90-NEXT:    fence.acquire.sys;
; SM90-NEXT:    st.param.b64 [func_retval0], %rd3;
; SM90-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst
  ret i64 %new
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
