; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible-sve  < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible-sve  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming-compatible-sve  < %s | FileCheck %s --check-prefixes=CHECK,SVE2

; This test only tests the legal types for a given vector width, as mulh nodes
; do not get generated for non-legal types.

target triple = "aarch64-unknown-linux-gnu"

;
; SMULH
;

define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; SVE-LABEL: smulh_v4i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl4
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    lsr z0.h, z0.h, #4
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v4i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ptrue p0.h, vl4
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
; SVE2-NEXT:    mul z0.h, z0.h, z1.h
; SVE2-NEXT:    lsr z0.h, z0.h, #4
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %insert = insertelement <4 x i16> undef, i16 4, i64 0
  %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
  %1 = sext <4 x i8> %op1 to <4 x i16>
  %2 = sext <4 x i8> %op2 to <4 x i16>
  %mul = mul <4 x i16> %1, %2
  %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4>
  %res = trunc <4 x i16> %shr to <4 x i8>
  ret <4 x i8> %res
}

define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; SVE-LABEL: smulh_v8i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.b, vl8
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v8i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %insert = insertelement <8 x i16> undef, i16 8, i64 0
  %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
  %1 = sext <8 x i8> %op1 to <8 x i16>
  %2 = sext <8 x i8> %op2 to <8 x i16>
  %mul = mul <8 x i16> %1, %2
  %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  %res = trunc <8 x i16> %shr to <8 x i8>
  ret <8 x i8> %res
}

define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; SVE-LABEL: smulh_v16i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.b, vl16
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v16i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = sext <16 x i8> %op1 to <16 x i16>
  %2 = sext <16 x i8> %op2 to <16 x i16>
  %mul = mul <16 x i16> %1, %2
  %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  %res = trunc <16 x i16> %shr to <16 x i8>
  ret <16 x i8> %res
}

define void @smulh_v32i8(ptr %a, ptr %b) {
; SVE-LABEL: smulh_v32i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.b, vl16
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    smulh z1.b, p0/m, z1.b, z3.b
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v32i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    smulh z0.b, z1.b, z0.b
; SVE2-NEXT:    smulh z1.b, z2.b, z3.b
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <32 x i8>, ptr %a
  %op2 = load <32 x i8>, ptr %b
  %1 = sext <32 x i8> %op1 to <32 x i16>
  %2 = sext <32 x i8> %op2 to <32 x i16>
  %mul = mul <32 x i16> %1, %2
  %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  %res = trunc <32 x i16> %shr to <32 x i8>
  store <32 x i8> %res, ptr %a
  ret void
}

define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; SVE-LABEL: smulh_v2i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl2
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    sxth z0.s, p0/m, z0.s
; SVE-NEXT:    sxth z1.s, p0/m, z1.s
; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    lsr z0.s, z0.s, #16
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v2i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ptrue p0.s, vl2
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
; SVE2-NEXT:    mul z0.s, z0.s, z1.s
; SVE2-NEXT:    lsr z0.s, z0.s, #16
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = sext <2 x i16> %op1 to <2 x i32>
  %2 = sext <2 x i16> %op2 to <2 x i32>
  %mul = mul <2 x i32> %1, %2
  %shr = lshr <2 x i32> %mul, <i32 16, i32 16>
  %res = trunc <2 x i32> %shr to <2 x i16>
  ret <2 x i16> %res
}

define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; SVE-LABEL: smulh_v4i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl4
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v4i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = sext <4 x i16> %op1 to <4 x i32>
  %2 = sext <4 x i16> %op2 to <4 x i32>
  %mul = mul <4 x i32> %1, %2
  %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
  %res = trunc <4 x i32> %shr to <4 x i16>
  ret <4 x i16> %res
}

define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; SVE-LABEL: smulh_v8i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl8
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v8i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = sext <8 x i16> %op1 to <8 x i32>
  %2 = sext <8 x i16> %op2 to <8 x i32>
  %mul = mul <8 x i32> %1, %2
  %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
  %res = trunc <8 x i32> %shr to <8 x i16>
  ret <8 x i16> %res
}

define void @smulh_v16i16(ptr %a, ptr %b) {
; SVE-LABEL: smulh_v16i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl8
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    smulh z1.h, p0/m, z1.h, z3.h
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v16i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    smulh z0.h, z1.h, z0.h
; SVE2-NEXT:    smulh z1.h, z2.h, z3.h
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <16 x i16>, ptr %a
  %op2 = load <16 x i16>, ptr %b
  %1 = sext <16 x i16> %op1 to <16 x i32>
  %2 = sext <16 x i16> %op2 to <16 x i32>
  %mul = mul <16 x i32> %1, %2
  %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
  %res = trunc <16 x i32> %shr to <16 x i16>
  store <16 x i16> %res, ptr %a
  ret void
}

define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; SVE-LABEL: smulh_v2i32:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl2
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v2i32:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = sext <2 x i32> %op1 to <2 x i64>
  %2 = sext <2 x i32> %op2 to <2 x i64>
  %mul = mul <2 x i64> %1, %2
  %shr = lshr <2 x i64> %mul, <i64 32, i64 32>
  %res = trunc <2 x i64> %shr to <2 x i32>
  ret <2 x i32> %res
}

define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; SVE-LABEL: smulh_v4i32:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl4
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v4i32:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = sext <4 x i32> %op1 to <4 x i64>
  %2 = sext <4 x i32> %op2 to <4 x i64>
  %mul = mul <4 x i64> %1, %2
  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
  %res = trunc <4 x i64> %shr to <4 x i32>
  ret <4 x i32> %res
}

define void @smulh_v8i32(ptr %a, ptr %b) {
; SVE-LABEL: smulh_v8i32:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl4
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    smulh z1.s, p0/m, z1.s, z3.s
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v8i32:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    smulh z0.s, z1.s, z0.s
; SVE2-NEXT:    smulh z1.s, z2.s, z3.s
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <8 x i32>, ptr %a
  %op2 = load <8 x i32>, ptr %b
  %1 = sext <8 x i32> %op1 to <8 x i64>
  %2 = sext <8 x i32> %op2 to <8 x i64>
  %mul = mul <8 x i64> %1, %2
  %shr = lshr <8 x i64> %mul,  <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
  %res = trunc <8 x i64> %shr to <8 x i32>
  store <8 x i32> %res, ptr %a
  ret void
}

define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; SVE-LABEL: smulh_v1i64:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.d, vl1
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v1i64:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %insert = insertelement <1 x i128> undef, i128 64, i128 0
  %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer
  %1 = sext <1 x i64> %op1 to <1 x i128>
  %2 = sext <1 x i64> %op2 to <1 x i128>
  %mul = mul <1 x i128> %1, %2
  %shr = lshr <1 x i128> %mul, %splat
  %res = trunc <1 x i128> %shr to <1 x i64>
  ret <1 x i64> %res
}

define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; SVE-LABEL: smulh_v2i64:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.d, vl2
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v2i64:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = sext <2 x i64> %op1 to <2 x i128>
  %2 = sext <2 x i64> %op2 to <2 x i128>
  %mul = mul <2 x i128> %1, %2
  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
  %res = trunc <2 x i128> %shr to <2 x i64>
  ret <2 x i64> %res
}

define void @smulh_v4i64(ptr %a, ptr %b) {
; SVE-LABEL: smulh_v4i64:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.d, vl2
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    smulh z1.d, p0/m, z1.d, z3.d
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: smulh_v4i64:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    smulh z0.d, z1.d, z0.d
; SVE2-NEXT:    smulh z1.d, z2.d, z3.d
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <4 x i64>, ptr %a
  %op2 = load <4 x i64>, ptr %b
  %1 = sext <4 x i64> %op1 to <4 x i128>
  %2 = sext <4 x i64> %op2 to <4 x i128>
  %mul = mul <4 x i128> %1, %2
  %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
  %res = trunc <4 x i128> %shr to <4 x i64>
  store <4 x i64> %res, ptr %a
  ret void
}

;
; UMULH
;

define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; SVE-LABEL: umulh_v4i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl4
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    and z0.h, z0.h, #0xff
; SVE-NEXT:    and z1.h, z1.h, #0xff
; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    lsr z0.h, z0.h, #4
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v4i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    and z0.h, z0.h, #0xff
; SVE2-NEXT:    and z1.h, z1.h, #0xff
; SVE2-NEXT:    mul z0.h, z0.h, z1.h
; SVE2-NEXT:    lsr z0.h, z0.h, #4
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <4 x i8> %op1 to <4 x i16>
  %2 = zext <4 x i8> %op2 to <4 x i16>
  %mul = mul <4 x i16> %1, %2
  %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4>
  %res = trunc <4 x i16> %shr to <4 x i8>
  ret <4 x i8> %res
}

define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; SVE-LABEL: umulh_v8i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.b, vl8
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v8i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <8 x i8> %op1 to <8 x i16>
  %2 = zext <8 x i8> %op2 to <8 x i16>
  %mul = mul <8 x i16> %1, %2
  %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  %res = trunc <8 x i16> %shr to <8 x i8>
  ret <8 x i8> %res
}

define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; SVE-LABEL: umulh_v16i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.b, vl16
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v16i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <16 x i8> %op1 to <16 x i16>
  %2 = zext <16 x i8> %op2 to <16 x i16>
  %mul = mul <16 x i16> %1, %2
  %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  %res = trunc <16 x i16> %shr to <16 x i8>
  ret <16 x i8> %res
}

define void @umulh_v32i8(ptr %a, ptr %b) {
; SVE-LABEL: umulh_v32i8:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.b, vl16
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    umulh z1.b, p0/m, z1.b, z3.b
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v32i8:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    umulh z0.b, z1.b, z0.b
; SVE2-NEXT:    umulh z1.b, z2.b, z3.b
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <32 x i8>, ptr %a
  %op2 = load <32 x i8>, ptr %b
  %1 = zext <32 x i8> %op1 to <32 x i16>
  %2 = zext <32 x i8> %op2 to <32 x i16>
  %mul = mul <32 x i16> %1, %2
  %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  %res = trunc <32 x i16> %shr to <32 x i8>
  store <32 x i8> %res, ptr %a
  ret void
}

define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; SVE-LABEL: umulh_v2i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl2
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    and z0.s, z0.s, #0xffff
; SVE-NEXT:    and z1.s, z1.s, #0xffff
; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    lsr z0.s, z0.s, #16
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v2i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    and z0.s, z0.s, #0xffff
; SVE2-NEXT:    and z1.s, z1.s, #0xffff
; SVE2-NEXT:    mul z0.s, z0.s, z1.s
; SVE2-NEXT:    lsr z0.s, z0.s, #16
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <2 x i16> %op1 to <2 x i32>
  %2 = zext <2 x i16> %op2 to <2 x i32>
  %mul = mul <2 x i32> %1, %2
  %shr = lshr <2 x i32> %mul, <i32 16, i32 16>
  %res = trunc <2 x i32> %shr to <2 x i16>
  ret <2 x i16> %res
}

define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; SVE-LABEL: umulh_v4i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl4
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v4i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <4 x i16> %op1 to <4 x i32>
  %2 = zext <4 x i16> %op2 to <4 x i32>
  %mul = mul <4 x i32> %1, %2
  %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
  %res = trunc <4 x i32> %shr to <4 x i16>
  ret <4 x i16> %res
}

define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; SVE-LABEL: umulh_v8i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl8
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v8i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <8 x i16> %op1 to <8 x i32>
  %2 = zext <8 x i16> %op2 to <8 x i32>
  %mul = mul <8 x i32> %1, %2
  %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
  %res = trunc <8 x i32> %shr to <8 x i16>
  ret <8 x i16> %res
}

define void @umulh_v16i16(ptr %a, ptr %b) {
; SVE-LABEL: umulh_v16i16:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.h, vl8
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    umulh z1.h, p0/m, z1.h, z3.h
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v16i16:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    umulh z0.h, z1.h, z0.h
; SVE2-NEXT:    umulh z1.h, z2.h, z3.h
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <16 x i16>, ptr %a
  %op2 = load <16 x i16>, ptr %b
  %1 = zext <16 x i16> %op1 to <16 x i32>
  %2 = zext <16 x i16> %op2 to <16 x i32>
  %mul = mul <16 x i32> %1, %2
  %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
  %res = trunc <16 x i32> %shr to <16 x i16>
  store <16 x i16> %res, ptr %a
  ret void
}

define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; SVE-LABEL: umulh_v2i32:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl2
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v2i32:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <2 x i32> %op1 to <2 x i64>
  %2 = zext <2 x i32> %op2 to <2 x i64>
  %mul = mul <2 x i64> %1, %2
  %shr = lshr <2 x i64> %mul, <i64 32, i64 32>
  %res = trunc <2 x i64> %shr to <2 x i32>
  ret <2 x i32> %res
}

define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; SVE-LABEL: umulh_v4i32:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl4
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v4i32:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <4 x i32> %op1 to <4 x i64>
  %2 = zext <4 x i32> %op2 to <4 x i64>
  %mul = mul <4 x i64> %1, %2
  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
  %res = trunc <4 x i64> %shr to <4 x i32>
  ret <4 x i32> %res
}

define void @umulh_v8i32(ptr %a, ptr %b) {
; SVE-LABEL: umulh_v8i32:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.s, vl4
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    umulh z1.s, p0/m, z1.s, z3.s
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v8i32:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    umulh z0.s, z1.s, z0.s
; SVE2-NEXT:    umulh z1.s, z2.s, z3.s
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <8 x i32>, ptr %a
  %op2 = load <8 x i32>, ptr %b
  %insert = insertelement <8 x i64> undef, i64 32, i64 0
  %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
  %1 = zext <8 x i32> %op1 to <8 x i64>
  %2 = zext <8 x i32> %op2 to <8 x i64>
  %mul = mul <8 x i64> %1, %2
  %shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
  %res = trunc <8 x i64> %shr to <8 x i32>
  store <8 x i32> %res, ptr %a
  ret void
}

define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; SVE-LABEL: umulh_v1i64:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.d, vl1
; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v1i64:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <1 x i64> %op1 to <1 x i128>
  %2 = zext <1 x i64> %op2 to <1 x i128>
  %mul = mul <1 x i128> %1, %2
  %shr = lshr <1 x i128> %mul, <i128 64>
  %res = trunc <1 x i128> %shr to <1 x i64>
  ret <1 x i64> %res
}

define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; SVE-LABEL: umulh_v2i64:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.d, vl2
; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v2i64:
; SVE2:       // %bb.0:
; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT:    ret
  %1 = zext <2 x i64> %op1 to <2 x i128>
  %2 = zext <2 x i64> %op2 to <2 x i128>
  %mul = mul <2 x i128> %1, %2
  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
  %res = trunc <2 x i128> %shr to <2 x i64>
  ret <2 x i64> %res
}

define void @umulh_v4i64(ptr %a, ptr %b) {
; SVE-LABEL: umulh_v4i64:
; SVE:       // %bb.0:
; SVE-NEXT:    ptrue p0.d, vl2
; SVE-NEXT:    ldp q0, q3, [x1]
; SVE-NEXT:    ldp q1, q2, [x0]
; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
; SVE-NEXT:    movprfx z1, z2
; SVE-NEXT:    umulh z1.d, p0/m, z1.d, z3.d
; SVE-NEXT:    stp q0, q1, [x0]
; SVE-NEXT:    ret
;
; SVE2-LABEL: umulh_v4i64:
; SVE2:       // %bb.0:
; SVE2-NEXT:    ldp q0, q3, [x1]
; SVE2-NEXT:    ldp q1, q2, [x0]
; SVE2-NEXT:    umulh z0.d, z1.d, z0.d
; SVE2-NEXT:    umulh z1.d, z2.d, z3.d
; SVE2-NEXT:    stp q0, q1, [x0]
; SVE2-NEXT:    ret
  %op1 = load <4 x i64>, ptr %a
  %op2 = load <4 x i64>, ptr %b
  %1 = zext <4 x i64> %op1 to <4 x i128>
  %2 = zext <4 x i64> %op2 to <4 x i128>
  %mul = mul <4 x i128> %1, %2
  %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
  %res = trunc <4 x i128> %shr to <4 x i64>
  store <4 x i64> %res, ptr %a
  ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
