Vector Permutation Intrinsics

Integer and Floating-Point Scalar Move Intrinsics

vfloat16mf4_t __riscv_vfmv_s_tu(vfloat16mf4_t vd, _Float16 rs1, size_t vl);
vfloat16mf2_t __riscv_vfmv_s_tu(vfloat16mf2_t vd, _Float16 rs1, size_t vl);
vfloat16m1_t __riscv_vfmv_s_tu(vfloat16m1_t vd, _Float16 rs1, size_t vl);
vfloat16m2_t __riscv_vfmv_s_tu(vfloat16m2_t vd, _Float16 rs1, size_t vl);
vfloat16m4_t __riscv_vfmv_s_tu(vfloat16m4_t vd, _Float16 rs1, size_t vl);
vfloat16m8_t __riscv_vfmv_s_tu(vfloat16m8_t vd, _Float16 rs1, size_t vl);
vfloat32mf2_t __riscv_vfmv_s_tu(vfloat32mf2_t vd, float rs1, size_t vl);
vfloat32m1_t __riscv_vfmv_s_tu(vfloat32m1_t vd, float rs1, size_t vl);
vfloat32m2_t __riscv_vfmv_s_tu(vfloat32m2_t vd, float rs1, size_t vl);
vfloat32m4_t __riscv_vfmv_s_tu(vfloat32m4_t vd, float rs1, size_t vl);
vfloat32m8_t __riscv_vfmv_s_tu(vfloat32m8_t vd, float rs1, size_t vl);
vfloat64m1_t __riscv_vfmv_s_tu(vfloat64m1_t vd, double rs1, size_t vl);
vfloat64m2_t __riscv_vfmv_s_tu(vfloat64m2_t vd, double rs1, size_t vl);
vfloat64m4_t __riscv_vfmv_s_tu(vfloat64m4_t vd, double rs1, size_t vl);
vfloat64m8_t __riscv_vfmv_s_tu(vfloat64m8_t vd, double rs1, size_t vl);
vint8mf8_t __riscv_vmv_s_tu(vint8mf8_t vd, int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmv_s_tu(vint8mf4_t vd, int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmv_s_tu(vint8mf2_t vd, int8_t rs1, size_t vl);
vint8m1_t __riscv_vmv_s_tu(vint8m1_t vd, int8_t rs1, size_t vl);
vint8m2_t __riscv_vmv_s_tu(vint8m2_t vd, int8_t rs1, size_t vl);
vint8m4_t __riscv_vmv_s_tu(vint8m4_t vd, int8_t rs1, size_t vl);
vint8m8_t __riscv_vmv_s_tu(vint8m8_t vd, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmv_s_tu(vint16mf4_t vd, int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmv_s_tu(vint16mf2_t vd, int16_t rs1, size_t vl);
vint16m1_t __riscv_vmv_s_tu(vint16m1_t vd, int16_t rs1, size_t vl);
vint16m2_t __riscv_vmv_s_tu(vint16m2_t vd, int16_t rs1, size_t vl);
vint16m4_t __riscv_vmv_s_tu(vint16m4_t vd, int16_t rs1, size_t vl);
vint16m8_t __riscv_vmv_s_tu(vint16m8_t vd, int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmv_s_tu(vint32mf2_t vd, int32_t rs1, size_t vl);
vint32m1_t __riscv_vmv_s_tu(vint32m1_t vd, int32_t rs1, size_t vl);
vint32m2_t __riscv_vmv_s_tu(vint32m2_t vd, int32_t rs1, size_t vl);
vint32m4_t __riscv_vmv_s_tu(vint32m4_t vd, int32_t rs1, size_t vl);
vint32m8_t __riscv_vmv_s_tu(vint32m8_t vd, int32_t rs1, size_t vl);
vint64m1_t __riscv_vmv_s_tu(vint64m1_t vd, int64_t rs1, size_t vl);
vint64m2_t __riscv_vmv_s_tu(vint64m2_t vd, int64_t rs1, size_t vl);
vint64m4_t __riscv_vmv_s_tu(vint64m4_t vd, int64_t rs1, size_t vl);
vint64m8_t __riscv_vmv_s_tu(vint64m8_t vd, int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmv_s_tu(vuint8mf8_t vd, uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmv_s_tu(vuint8mf4_t vd, uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmv_s_tu(vuint8mf2_t vd, uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmv_s_tu(vuint8m1_t vd, uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmv_s_tu(vuint8m2_t vd, uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmv_s_tu(vuint8m4_t vd, uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmv_s_tu(vuint8m8_t vd, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmv_s_tu(vuint16mf4_t vd, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmv_s_tu(vuint16mf2_t vd, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmv_s_tu(vuint16m1_t vd, uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmv_s_tu(vuint16m2_t vd, uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmv_s_tu(vuint16m4_t vd, uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmv_s_tu(vuint16m8_t vd, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmv_s_tu(vuint32mf2_t vd, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmv_s_tu(vuint32m1_t vd, uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmv_s_tu(vuint32m2_t vd, uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmv_s_tu(vuint32m4_t vd, uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmv_s_tu(vuint32m8_t vd, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmv_s_tu(vuint64m1_t vd, uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmv_s_tu(vuint64m2_t vd, uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmv_s_tu(vuint64m4_t vd, uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmv_s_tu(vuint64m8_t vd, uint64_t rs1, size_t vl);

Vector Slideup Intrinsics

vfloat16mf4_t __riscv_vslideup_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                  size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslideup_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                  size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslideup_tu(vfloat16m1_t vd, vfloat16m1_t vs2, size_t rs1,
                                 size_t vl);
vfloat16m2_t __riscv_vslideup_tu(vfloat16m2_t vd, vfloat16m2_t vs2, size_t rs1,
                                 size_t vl);
vfloat16m4_t __riscv_vslideup_tu(vfloat16m4_t vd, vfloat16m4_t vs2, size_t rs1,
                                 size_t vl);
vfloat16m8_t __riscv_vslideup_tu(vfloat16m8_t vd, vfloat16m8_t vs2, size_t rs1,
                                 size_t vl);
vfloat32mf2_t __riscv_vslideup_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                  size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslideup_tu(vfloat32m1_t vd, vfloat32m1_t vs2, size_t rs1,
                                 size_t vl);
vfloat32m2_t __riscv_vslideup_tu(vfloat32m2_t vd, vfloat32m2_t vs2, size_t rs1,
                                 size_t vl);
vfloat32m4_t __riscv_vslideup_tu(vfloat32m4_t vd, vfloat32m4_t vs2, size_t rs1,
                                 size_t vl);
vfloat32m8_t __riscv_vslideup_tu(vfloat32m8_t vd, vfloat32m8_t vs2, size_t rs1,
                                 size_t vl);
vfloat64m1_t __riscv_vslideup_tu(vfloat64m1_t vd, vfloat64m1_t vs2, size_t rs1,
                                 size_t vl);
vfloat64m2_t __riscv_vslideup_tu(vfloat64m2_t vd, vfloat64m2_t vs2, size_t rs1,
                                 size_t vl);
vfloat64m4_t __riscv_vslideup_tu(vfloat64m4_t vd, vfloat64m4_t vs2, size_t rs1,
                                 size_t vl);
vfloat64m8_t __riscv_vslideup_tu(vfloat64m8_t vd, vfloat64m8_t vs2, size_t rs1,
                                 size_t vl);
vint8mf8_t __riscv_vslideup_tu(vint8mf8_t vd, vint8mf8_t vs2, size_t rs1,
                               size_t vl);
vint8mf4_t __riscv_vslideup_tu(vint8mf4_t vd, vint8mf4_t vs2, size_t rs1,
                               size_t vl);
vint8mf2_t __riscv_vslideup_tu(vint8mf2_t vd, vint8mf2_t vs2, size_t rs1,
                               size_t vl);
vint8m1_t __riscv_vslideup_tu(vint8m1_t vd, vint8m1_t vs2, size_t rs1,
                              size_t vl);
vint8m2_t __riscv_vslideup_tu(vint8m2_t vd, vint8m2_t vs2, size_t rs1,
                              size_t vl);
vint8m4_t __riscv_vslideup_tu(vint8m4_t vd, vint8m4_t vs2, size_t rs1,
                              size_t vl);
vint8m8_t __riscv_vslideup_tu(vint8m8_t vd, vint8m8_t vs2, size_t rs1,
                              size_t vl);
vint16mf4_t __riscv_vslideup_tu(vint16mf4_t vd, vint16mf4_t vs2, size_t rs1,
                                size_t vl);
vint16mf2_t __riscv_vslideup_tu(vint16mf2_t vd, vint16mf2_t vs2, size_t rs1,
                                size_t vl);
vint16m1_t __riscv_vslideup_tu(vint16m1_t vd, vint16m1_t vs2, size_t rs1,
                               size_t vl);
vint16m2_t __riscv_vslideup_tu(vint16m2_t vd, vint16m2_t vs2, size_t rs1,
                               size_t vl);
vint16m4_t __riscv_vslideup_tu(vint16m4_t vd, vint16m4_t vs2, size_t rs1,
                               size_t vl);
vint16m8_t __riscv_vslideup_tu(vint16m8_t vd, vint16m8_t vs2, size_t rs1,
                               size_t vl);
vint32mf2_t __riscv_vslideup_tu(vint32mf2_t vd, vint32mf2_t vs2, size_t rs1,
                                size_t vl);
vint32m1_t __riscv_vslideup_tu(vint32m1_t vd, vint32m1_t vs2, size_t rs1,
                               size_t vl);
vint32m2_t __riscv_vslideup_tu(vint32m2_t vd, vint32m2_t vs2, size_t rs1,
                               size_t vl);
vint32m4_t __riscv_vslideup_tu(vint32m4_t vd, vint32m4_t vs2, size_t rs1,
                               size_t vl);
vint32m8_t __riscv_vslideup_tu(vint32m8_t vd, vint32m8_t vs2, size_t rs1,
                               size_t vl);
vint64m1_t __riscv_vslideup_tu(vint64m1_t vd, vint64m1_t vs2, size_t rs1,
                               size_t vl);
vint64m2_t __riscv_vslideup_tu(vint64m2_t vd, vint64m2_t vs2, size_t rs1,
                               size_t vl);
vint64m4_t __riscv_vslideup_tu(vint64m4_t vd, vint64m4_t vs2, size_t rs1,
                               size_t vl);
vint64m8_t __riscv_vslideup_tu(vint64m8_t vd, vint64m8_t vs2, size_t rs1,
                               size_t vl);
vuint8mf8_t __riscv_vslideup_tu(vuint8mf8_t vd, vuint8mf8_t vs2, size_t rs1,
                                size_t vl);
vuint8mf4_t __riscv_vslideup_tu(vuint8mf4_t vd, vuint8mf4_t vs2, size_t rs1,
                                size_t vl);
vuint8mf2_t __riscv_vslideup_tu(vuint8mf2_t vd, vuint8mf2_t vs2, size_t rs1,
                                size_t vl);
vuint8m1_t __riscv_vslideup_tu(vuint8m1_t vd, vuint8m1_t vs2, size_t rs1,
                               size_t vl);
vuint8m2_t __riscv_vslideup_tu(vuint8m2_t vd, vuint8m2_t vs2, size_t rs1,
                               size_t vl);
vuint8m4_t __riscv_vslideup_tu(vuint8m4_t vd, vuint8m4_t vs2, size_t rs1,
                               size_t vl);
vuint8m8_t __riscv_vslideup_tu(vuint8m8_t vd, vuint8m8_t vs2, size_t rs1,
                               size_t vl);
vuint16mf4_t __riscv_vslideup_tu(vuint16mf4_t vd, vuint16mf4_t vs2, size_t rs1,
                                 size_t vl);
vuint16mf2_t __riscv_vslideup_tu(vuint16mf2_t vd, vuint16mf2_t vs2, size_t rs1,
                                 size_t vl);
vuint16m1_t __riscv_vslideup_tu(vuint16m1_t vd, vuint16m1_t vs2, size_t rs1,
                                size_t vl);
vuint16m2_t __riscv_vslideup_tu(vuint16m2_t vd, vuint16m2_t vs2, size_t rs1,
                                size_t vl);
vuint16m4_t __riscv_vslideup_tu(vuint16m4_t vd, vuint16m4_t vs2, size_t rs1,
                                size_t vl);
vuint16m8_t __riscv_vslideup_tu(vuint16m8_t vd, vuint16m8_t vs2, size_t rs1,
                                size_t vl);
vuint32mf2_t __riscv_vslideup_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t rs1,
                                 size_t vl);
vuint32m1_t __riscv_vslideup_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t rs1,
                                size_t vl);
vuint32m2_t __riscv_vslideup_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t rs1,
                                size_t vl);
vuint32m4_t __riscv_vslideup_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t rs1,
                                size_t vl);
vuint32m8_t __riscv_vslideup_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t rs1,
                                size_t vl);
vuint64m1_t __riscv_vslideup_tu(vuint64m1_t vd, vuint64m1_t vs2, size_t rs1,
                                size_t vl);
vuint64m2_t __riscv_vslideup_tu(vuint64m2_t vd, vuint64m2_t vs2, size_t rs1,
                                size_t vl);
vuint64m4_t __riscv_vslideup_tu(vuint64m4_t vd, vuint64m4_t vs2, size_t rs1,
                                size_t vl);
vuint64m8_t __riscv_vslideup_tu(vuint64m8_t vd, vuint64m8_t vs2, size_t rs1,
                                size_t vl);
// masked functions
vfloat16mf4_t __riscv_vslideup_tum(vbool64_t vm, vfloat16mf4_t vd,
                                   vfloat16mf4_t vs2, size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslideup_tum(vbool32_t vm, vfloat16mf2_t vd,
                                   vfloat16mf2_t vs2, size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslideup_tum(vbool16_t vm, vfloat16m1_t vd,
                                  vfloat16m1_t vs2, size_t rs1, size_t vl);
vfloat16m2_t __riscv_vslideup_tum(vbool8_t vm, vfloat16m2_t vd,
                                  vfloat16m2_t vs2, size_t rs1, size_t vl);
vfloat16m4_t __riscv_vslideup_tum(vbool4_t vm, vfloat16m4_t vd,
                                  vfloat16m4_t vs2, size_t rs1, size_t vl);
vfloat16m8_t __riscv_vslideup_tum(vbool2_t vm, vfloat16m8_t vd,
                                  vfloat16m8_t vs2, size_t rs1, size_t vl);
vfloat32mf2_t __riscv_vslideup_tum(vbool64_t vm, vfloat32mf2_t vd,
                                   vfloat32mf2_t vs2, size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslideup_tum(vbool32_t vm, vfloat32m1_t vd,
                                  vfloat32m1_t vs2, size_t rs1, size_t vl);
vfloat32m2_t __riscv_vslideup_tum(vbool16_t vm, vfloat32m2_t vd,
                                  vfloat32m2_t vs2, size_t rs1, size_t vl);
vfloat32m4_t __riscv_vslideup_tum(vbool8_t vm, vfloat32m4_t vd,
                                  vfloat32m4_t vs2, size_t rs1, size_t vl);
vfloat32m8_t __riscv_vslideup_tum(vbool4_t vm, vfloat32m8_t vd,
                                  vfloat32m8_t vs2, size_t rs1, size_t vl);
vfloat64m1_t __riscv_vslideup_tum(vbool64_t vm, vfloat64m1_t vd,
                                  vfloat64m1_t vs2, size_t rs1, size_t vl);
vfloat64m2_t __riscv_vslideup_tum(vbool32_t vm, vfloat64m2_t vd,
                                  vfloat64m2_t vs2, size_t rs1, size_t vl);
vfloat64m4_t __riscv_vslideup_tum(vbool16_t vm, vfloat64m4_t vd,
                                  vfloat64m4_t vs2, size_t rs1, size_t vl);
vfloat64m8_t __riscv_vslideup_tum(vbool8_t vm, vfloat64m8_t vd,
                                  vfloat64m8_t vs2, size_t rs1, size_t vl);
vint8mf8_t __riscv_vslideup_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                size_t rs1, size_t vl);
vint8mf4_t __riscv_vslideup_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                size_t rs1, size_t vl);
vint8mf2_t __riscv_vslideup_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                size_t rs1, size_t vl);
vint8m1_t __riscv_vslideup_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                               size_t rs1, size_t vl);
vint8m2_t __riscv_vslideup_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                               size_t rs1, size_t vl);
vint8m4_t __riscv_vslideup_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                               size_t rs1, size_t vl);
vint8m8_t __riscv_vslideup_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                               size_t rs1, size_t vl);
vint16mf4_t __riscv_vslideup_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 size_t rs1, size_t vl);
vint16mf2_t __riscv_vslideup_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 size_t rs1, size_t vl);
vint16m1_t __riscv_vslideup_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                size_t rs1, size_t vl);
vint16m2_t __riscv_vslideup_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                size_t rs1, size_t vl);
vint16m4_t __riscv_vslideup_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                size_t rs1, size_t vl);
vint16m8_t __riscv_vslideup_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                size_t rs1, size_t vl);
vint32mf2_t __riscv_vslideup_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 size_t rs1, size_t vl);
vint32m1_t __riscv_vslideup_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                size_t rs1, size_t vl);
vint32m2_t __riscv_vslideup_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                size_t rs1, size_t vl);
vint32m4_t __riscv_vslideup_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                size_t rs1, size_t vl);
vint32m8_t __riscv_vslideup_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                size_t rs1, size_t vl);
vint64m1_t __riscv_vslideup_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                size_t rs1, size_t vl);
vint64m2_t __riscv_vslideup_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                size_t rs1, size_t vl);
vint64m4_t __riscv_vslideup_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                size_t rs1, size_t vl);
vint64m8_t __riscv_vslideup_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                size_t rs1, size_t vl);
vuint8mf8_t __riscv_vslideup_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                 size_t rs1, size_t vl);
vuint8mf4_t __riscv_vslideup_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                 size_t rs1, size_t vl);
vuint8mf2_t __riscv_vslideup_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                 size_t rs1, size_t vl);
vuint8m1_t __riscv_vslideup_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                size_t rs1, size_t vl);
vuint8m2_t __riscv_vslideup_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                size_t rs1, size_t vl);
vuint8m4_t __riscv_vslideup_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                size_t rs1, size_t vl);
vuint8m8_t __riscv_vslideup_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                size_t rs1, size_t vl);
vuint16mf4_t __riscv_vslideup_tum(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, size_t rs1, size_t vl);
vuint16mf2_t __riscv_vslideup_tum(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, size_t rs1, size_t vl);
vuint16m1_t __riscv_vslideup_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 size_t rs1, size_t vl);
vuint16m2_t __riscv_vslideup_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 size_t rs1, size_t vl);
vuint16m4_t __riscv_vslideup_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 size_t rs1, size_t vl);
vuint16m8_t __riscv_vslideup_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 size_t rs1, size_t vl);
vuint32mf2_t __riscv_vslideup_tum(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, size_t rs1, size_t vl);
vuint32m1_t __riscv_vslideup_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 size_t rs1, size_t vl);
vuint32m2_t __riscv_vslideup_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 size_t rs1, size_t vl);
vuint32m4_t __riscv_vslideup_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 size_t rs1, size_t vl);
vuint32m8_t __riscv_vslideup_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 size_t rs1, size_t vl);
vuint64m1_t __riscv_vslideup_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 size_t rs1, size_t vl);
vuint64m2_t __riscv_vslideup_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 size_t rs1, size_t vl);
vuint64m4_t __riscv_vslideup_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 size_t rs1, size_t vl);
vuint64m8_t __riscv_vslideup_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 size_t rs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vslideup_tumu(vbool64_t vm, vfloat16mf4_t vd,
                                    vfloat16mf4_t vs2, size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslideup_tumu(vbool32_t vm, vfloat16mf2_t vd,
                                    vfloat16mf2_t vs2, size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslideup_tumu(vbool16_t vm, vfloat16m1_t vd,
                                   vfloat16m1_t vs2, size_t rs1, size_t vl);
vfloat16m2_t __riscv_vslideup_tumu(vbool8_t vm, vfloat16m2_t vd,
                                   vfloat16m2_t vs2, size_t rs1, size_t vl);
vfloat16m4_t __riscv_vslideup_tumu(vbool4_t vm, vfloat16m4_t vd,
                                   vfloat16m4_t vs2, size_t rs1, size_t vl);
vfloat16m8_t __riscv_vslideup_tumu(vbool2_t vm, vfloat16m8_t vd,
                                   vfloat16m8_t vs2, size_t rs1, size_t vl);
vfloat32mf2_t __riscv_vslideup_tumu(vbool64_t vm, vfloat32mf2_t vd,
                                    vfloat32mf2_t vs2, size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslideup_tumu(vbool32_t vm, vfloat32m1_t vd,
                                   vfloat32m1_t vs2, size_t rs1, size_t vl);
vfloat32m2_t __riscv_vslideup_tumu(vbool16_t vm, vfloat32m2_t vd,
                                   vfloat32m2_t vs2, size_t rs1, size_t vl);
vfloat32m4_t __riscv_vslideup_tumu(vbool8_t vm, vfloat32m4_t vd,
                                   vfloat32m4_t vs2, size_t rs1, size_t vl);
vfloat32m8_t __riscv_vslideup_tumu(vbool4_t vm, vfloat32m8_t vd,
                                   vfloat32m8_t vs2, size_t rs1, size_t vl);
vfloat64m1_t __riscv_vslideup_tumu(vbool64_t vm, vfloat64m1_t vd,
                                   vfloat64m1_t vs2, size_t rs1, size_t vl);
vfloat64m2_t __riscv_vslideup_tumu(vbool32_t vm, vfloat64m2_t vd,
                                   vfloat64m2_t vs2, size_t rs1, size_t vl);
vfloat64m4_t __riscv_vslideup_tumu(vbool16_t vm, vfloat64m4_t vd,
                                   vfloat64m4_t vs2, size_t rs1, size_t vl);
vfloat64m8_t __riscv_vslideup_tumu(vbool8_t vm, vfloat64m8_t vd,
                                   vfloat64m8_t vs2, size_t rs1, size_t vl);
vint8mf8_t __riscv_vslideup_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                 size_t rs1, size_t vl);
vint8mf4_t __riscv_vslideup_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                 size_t rs1, size_t vl);
vint8mf2_t __riscv_vslideup_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                 size_t rs1, size_t vl);
vint8m1_t __riscv_vslideup_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                size_t rs1, size_t vl);
vint8m2_t __riscv_vslideup_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                size_t rs1, size_t vl);
vint8m4_t __riscv_vslideup_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                size_t rs1, size_t vl);
vint8m8_t __riscv_vslideup_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                size_t rs1, size_t vl);
vint16mf4_t __riscv_vslideup_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  size_t rs1, size_t vl);
vint16mf2_t __riscv_vslideup_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  size_t rs1, size_t vl);
vint16m1_t __riscv_vslideup_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 size_t rs1, size_t vl);
vint16m2_t __riscv_vslideup_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 size_t rs1, size_t vl);
vint16m4_t __riscv_vslideup_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 size_t rs1, size_t vl);
vint16m8_t __riscv_vslideup_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 size_t rs1, size_t vl);
vint32mf2_t __riscv_vslideup_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  size_t rs1, size_t vl);
vint32m1_t __riscv_vslideup_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 size_t rs1, size_t vl);
vint32m2_t __riscv_vslideup_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 size_t rs1, size_t vl);
vint32m4_t __riscv_vslideup_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 size_t rs1, size_t vl);
vint32m8_t __riscv_vslideup_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 size_t rs1, size_t vl);
vint64m1_t __riscv_vslideup_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 size_t rs1, size_t vl);
vint64m2_t __riscv_vslideup_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 size_t rs1, size_t vl);
vint64m4_t __riscv_vslideup_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 size_t rs1, size_t vl);
vint64m8_t __riscv_vslideup_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 size_t rs1, size_t vl);
vuint8mf8_t __riscv_vslideup_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                  size_t rs1, size_t vl);
vuint8mf4_t __riscv_vslideup_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                  size_t rs1, size_t vl);
vuint8mf2_t __riscv_vslideup_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                  size_t rs1, size_t vl);
vuint8m1_t __riscv_vslideup_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                 size_t rs1, size_t vl);
vuint8m2_t __riscv_vslideup_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                 size_t rs1, size_t vl);
vuint8m4_t __riscv_vslideup_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                 size_t rs1, size_t vl);
vuint8m8_t __riscv_vslideup_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                 size_t rs1, size_t vl);
vuint16mf4_t __riscv_vslideup_tumu(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, size_t rs1, size_t vl);
vuint16mf2_t __riscv_vslideup_tumu(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, size_t rs1, size_t vl);
vuint16m1_t __riscv_vslideup_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  size_t rs1, size_t vl);
vuint16m2_t __riscv_vslideup_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  size_t rs1, size_t vl);
vuint16m4_t __riscv_vslideup_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  size_t rs1, size_t vl);
vuint16m8_t __riscv_vslideup_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  size_t rs1, size_t vl);
vuint32mf2_t __riscv_vslideup_tumu(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, size_t rs1, size_t vl);
vuint32m1_t __riscv_vslideup_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  size_t rs1, size_t vl);
vuint32m2_t __riscv_vslideup_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  size_t rs1, size_t vl);
vuint32m4_t __riscv_vslideup_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  size_t rs1, size_t vl);
vuint32m8_t __riscv_vslideup_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  size_t rs1, size_t vl);
vuint64m1_t __riscv_vslideup_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  size_t rs1, size_t vl);
vuint64m2_t __riscv_vslideup_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  size_t rs1, size_t vl);
vuint64m4_t __riscv_vslideup_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  size_t rs1, size_t vl);
vuint64m8_t __riscv_vslideup_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  size_t rs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vslideup_mu(vbool64_t vm, vfloat16mf4_t vd,
                                  vfloat16mf4_t vs2, size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslideup_mu(vbool32_t vm, vfloat16mf2_t vd,
                                  vfloat16mf2_t vs2, size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslideup_mu(vbool16_t vm, vfloat16m1_t vd,
                                 vfloat16m1_t vs2, size_t rs1, size_t vl);
vfloat16m2_t __riscv_vslideup_mu(vbool8_t vm, vfloat16m2_t vd, vfloat16m2_t vs2,
                                 size_t rs1, size_t vl);
vfloat16m4_t __riscv_vslideup_mu(vbool4_t vm, vfloat16m4_t vd, vfloat16m4_t vs2,
                                 size_t rs1, size_t vl);
vfloat16m8_t __riscv_vslideup_mu(vbool2_t vm, vfloat16m8_t vd, vfloat16m8_t vs2,
                                 size_t rs1, size_t vl);
vfloat32mf2_t __riscv_vslideup_mu(vbool64_t vm, vfloat32mf2_t vd,
                                  vfloat32mf2_t vs2, size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslideup_mu(vbool32_t vm, vfloat32m1_t vd,
                                 vfloat32m1_t vs2, size_t rs1, size_t vl);
vfloat32m2_t __riscv_vslideup_mu(vbool16_t vm, vfloat32m2_t vd,
                                 vfloat32m2_t vs2, size_t rs1, size_t vl);
vfloat32m4_t __riscv_vslideup_mu(vbool8_t vm, vfloat32m4_t vd, vfloat32m4_t vs2,
                                 size_t rs1, size_t vl);
vfloat32m8_t __riscv_vslideup_mu(vbool4_t vm, vfloat32m8_t vd, vfloat32m8_t vs2,
                                 size_t rs1, size_t vl);
vfloat64m1_t __riscv_vslideup_mu(vbool64_t vm, vfloat64m1_t vd,
                                 vfloat64m1_t vs2, size_t rs1, size_t vl);
vfloat64m2_t __riscv_vslideup_mu(vbool32_t vm, vfloat64m2_t vd,
                                 vfloat64m2_t vs2, size_t rs1, size_t vl);
vfloat64m4_t __riscv_vslideup_mu(vbool16_t vm, vfloat64m4_t vd,
                                 vfloat64m4_t vs2, size_t rs1, size_t vl);
vfloat64m8_t __riscv_vslideup_mu(vbool8_t vm, vfloat64m8_t vd, vfloat64m8_t vs2,
                                 size_t rs1, size_t vl);
vint8mf8_t __riscv_vslideup_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                               size_t rs1, size_t vl);
vint8mf4_t __riscv_vslideup_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                               size_t rs1, size_t vl);
vint8mf2_t __riscv_vslideup_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                               size_t rs1, size_t vl);
vint8m1_t __riscv_vslideup_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                              size_t rs1, size_t vl);
vint8m2_t __riscv_vslideup_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                              size_t rs1, size_t vl);
vint8m4_t __riscv_vslideup_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                              size_t rs1, size_t vl);
vint8m8_t __riscv_vslideup_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                              size_t rs1, size_t vl);
vint16mf4_t __riscv_vslideup_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                size_t rs1, size_t vl);
vint16mf2_t __riscv_vslideup_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                size_t rs1, size_t vl);
vint16m1_t __riscv_vslideup_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               size_t rs1, size_t vl);
vint16m2_t __riscv_vslideup_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               size_t rs1, size_t vl);
vint16m4_t __riscv_vslideup_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               size_t rs1, size_t vl);
vint16m8_t __riscv_vslideup_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               size_t rs1, size_t vl);
vint32mf2_t __riscv_vslideup_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                size_t rs1, size_t vl);
vint32m1_t __riscv_vslideup_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               size_t rs1, size_t vl);
vint32m2_t __riscv_vslideup_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               size_t rs1, size_t vl);
vint32m4_t __riscv_vslideup_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               size_t rs1, size_t vl);
vint32m8_t __riscv_vslideup_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               size_t rs1, size_t vl);
vint64m1_t __riscv_vslideup_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               size_t rs1, size_t vl);
vint64m2_t __riscv_vslideup_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               size_t rs1, size_t vl);
vint64m4_t __riscv_vslideup_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               size_t rs1, size_t vl);
vint64m8_t __riscv_vslideup_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               size_t rs1, size_t vl);
vuint8mf8_t __riscv_vslideup_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                size_t rs1, size_t vl);
vuint8mf4_t __riscv_vslideup_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                size_t rs1, size_t vl);
vuint8mf2_t __riscv_vslideup_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                size_t rs1, size_t vl);
vuint8m1_t __riscv_vslideup_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                               size_t rs1, size_t vl);
vuint8m2_t __riscv_vslideup_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                               size_t rs1, size_t vl);
vuint8m4_t __riscv_vslideup_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                               size_t rs1, size_t vl);
vuint8m8_t __riscv_vslideup_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                               size_t rs1, size_t vl);
vuint16mf4_t __riscv_vslideup_mu(vbool64_t vm, vuint16mf4_t vd,
                                 vuint16mf4_t vs2, size_t rs1, size_t vl);
vuint16mf2_t __riscv_vslideup_mu(vbool32_t vm, vuint16mf2_t vd,
                                 vuint16mf2_t vs2, size_t rs1, size_t vl);
vuint16m1_t __riscv_vslideup_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                size_t rs1, size_t vl);
vuint16m2_t __riscv_vslideup_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                size_t rs1, size_t vl);
vuint16m4_t __riscv_vslideup_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                size_t rs1, size_t vl);
vuint16m8_t __riscv_vslideup_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                size_t rs1, size_t vl);
vuint32mf2_t __riscv_vslideup_mu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint32mf2_t vs2, size_t rs1, size_t vl);
vuint32m1_t __riscv_vslideup_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                size_t rs1, size_t vl);
vuint32m2_t __riscv_vslideup_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                size_t rs1, size_t vl);
vuint32m4_t __riscv_vslideup_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                size_t rs1, size_t vl);
vuint32m8_t __riscv_vslideup_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                size_t rs1, size_t vl);
vuint64m1_t __riscv_vslideup_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                size_t rs1, size_t vl);
vuint64m2_t __riscv_vslideup_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                size_t rs1, size_t vl);
vuint64m4_t __riscv_vslideup_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                size_t rs1, size_t vl);
vuint64m8_t __riscv_vslideup_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                size_t rs1, size_t vl);

Vector Slidedown Intrinsics

vfloat16mf4_t __riscv_vslidedown_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                    size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslidedown_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                    size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslidedown_tu(vfloat16m1_t vd, vfloat16m1_t vs2,
                                   size_t rs1, size_t vl);
vfloat16m2_t __riscv_vslidedown_tu(vfloat16m2_t vd, vfloat16m2_t vs2,
                                   size_t rs1, size_t vl);
vfloat16m4_t __riscv_vslidedown_tu(vfloat16m4_t vd, vfloat16m4_t vs2,
                                   size_t rs1, size_t vl);
vfloat16m8_t __riscv_vslidedown_tu(vfloat16m8_t vd, vfloat16m8_t vs2,
                                   size_t rs1, size_t vl);
vfloat32mf2_t __riscv_vslidedown_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                    size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslidedown_tu(vfloat32m1_t vd, vfloat32m1_t vs2,
                                   size_t rs1, size_t vl);
vfloat32m2_t __riscv_vslidedown_tu(vfloat32m2_t vd, vfloat32m2_t vs2,
                                   size_t rs1, size_t vl);
vfloat32m4_t __riscv_vslidedown_tu(vfloat32m4_t vd, vfloat32m4_t vs2,
                                   size_t rs1, size_t vl);
vfloat32m8_t __riscv_vslidedown_tu(vfloat32m8_t vd, vfloat32m8_t vs2,
                                   size_t rs1, size_t vl);
vfloat64m1_t __riscv_vslidedown_tu(vfloat64m1_t vd, vfloat64m1_t vs2,
                                   size_t rs1, size_t vl);
vfloat64m2_t __riscv_vslidedown_tu(vfloat64m2_t vd, vfloat64m2_t vs2,
                                   size_t rs1, size_t vl);
vfloat64m4_t __riscv_vslidedown_tu(vfloat64m4_t vd, vfloat64m4_t vs2,
                                   size_t rs1, size_t vl);
vfloat64m8_t __riscv_vslidedown_tu(vfloat64m8_t vd, vfloat64m8_t vs2,
                                   size_t rs1, size_t vl);
vint8mf8_t __riscv_vslidedown_tu(vint8mf8_t vd, vint8mf8_t vs2, size_t rs1,
                                 size_t vl);
vint8mf4_t __riscv_vslidedown_tu(vint8mf4_t vd, vint8mf4_t vs2, size_t rs1,
                                 size_t vl);
vint8mf2_t __riscv_vslidedown_tu(vint8mf2_t vd, vint8mf2_t vs2, size_t rs1,
                                 size_t vl);
vint8m1_t __riscv_vslidedown_tu(vint8m1_t vd, vint8m1_t vs2, size_t rs1,
                                size_t vl);
vint8m2_t __riscv_vslidedown_tu(vint8m2_t vd, vint8m2_t vs2, size_t rs1,
                                size_t vl);
vint8m4_t __riscv_vslidedown_tu(vint8m4_t vd, vint8m4_t vs2, size_t rs1,
                                size_t vl);
vint8m8_t __riscv_vslidedown_tu(vint8m8_t vd, vint8m8_t vs2, size_t rs1,
                                size_t vl);
vint16mf4_t __riscv_vslidedown_tu(vint16mf4_t vd, vint16mf4_t vs2, size_t rs1,
                                  size_t vl);
vint16mf2_t __riscv_vslidedown_tu(vint16mf2_t vd, vint16mf2_t vs2, size_t rs1,
                                  size_t vl);
vint16m1_t __riscv_vslidedown_tu(vint16m1_t vd, vint16m1_t vs2, size_t rs1,
                                 size_t vl);
vint16m2_t __riscv_vslidedown_tu(vint16m2_t vd, vint16m2_t vs2, size_t rs1,
                                 size_t vl);
vint16m4_t __riscv_vslidedown_tu(vint16m4_t vd, vint16m4_t vs2, size_t rs1,
                                 size_t vl);
vint16m8_t __riscv_vslidedown_tu(vint16m8_t vd, vint16m8_t vs2, size_t rs1,
                                 size_t vl);
vint32mf2_t __riscv_vslidedown_tu(vint32mf2_t vd, vint32mf2_t vs2, size_t rs1,
                                  size_t vl);
vint32m1_t __riscv_vslidedown_tu(vint32m1_t vd, vint32m1_t vs2, size_t rs1,
                                 size_t vl);
vint32m2_t __riscv_vslidedown_tu(vint32m2_t vd, vint32m2_t vs2, size_t rs1,
                                 size_t vl);
vint32m4_t __riscv_vslidedown_tu(vint32m4_t vd, vint32m4_t vs2, size_t rs1,
                                 size_t vl);
vint32m8_t __riscv_vslidedown_tu(vint32m8_t vd, vint32m8_t vs2, size_t rs1,
                                 size_t vl);
vint64m1_t __riscv_vslidedown_tu(vint64m1_t vd, vint64m1_t vs2, size_t rs1,
                                 size_t vl);
vint64m2_t __riscv_vslidedown_tu(vint64m2_t vd, vint64m2_t vs2, size_t rs1,
                                 size_t vl);
vint64m4_t __riscv_vslidedown_tu(vint64m4_t vd, vint64m4_t vs2, size_t rs1,
                                 size_t vl);
vint64m8_t __riscv_vslidedown_tu(vint64m8_t vd, vint64m8_t vs2, size_t rs1,
                                 size_t vl);
vuint8mf8_t __riscv_vslidedown_tu(vuint8mf8_t vd, vuint8mf8_t vs2, size_t rs1,
                                  size_t vl);
vuint8mf4_t __riscv_vslidedown_tu(vuint8mf4_t vd, vuint8mf4_t vs2, size_t rs1,
                                  size_t vl);
vuint8mf2_t __riscv_vslidedown_tu(vuint8mf2_t vd, vuint8mf2_t vs2, size_t rs1,
                                  size_t vl);
vuint8m1_t __riscv_vslidedown_tu(vuint8m1_t vd, vuint8m1_t vs2, size_t rs1,
                                 size_t vl);
vuint8m2_t __riscv_vslidedown_tu(vuint8m2_t vd, vuint8m2_t vs2, size_t rs1,
                                 size_t vl);
vuint8m4_t __riscv_vslidedown_tu(vuint8m4_t vd, vuint8m4_t vs2, size_t rs1,
                                 size_t vl);
vuint8m8_t __riscv_vslidedown_tu(vuint8m8_t vd, vuint8m8_t vs2, size_t rs1,
                                 size_t vl);
vuint16mf4_t __riscv_vslidedown_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                   size_t rs1, size_t vl);
vuint16mf2_t __riscv_vslidedown_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                   size_t rs1, size_t vl);
vuint16m1_t __riscv_vslidedown_tu(vuint16m1_t vd, vuint16m1_t vs2, size_t rs1,
                                  size_t vl);
vuint16m2_t __riscv_vslidedown_tu(vuint16m2_t vd, vuint16m2_t vs2, size_t rs1,
                                  size_t vl);
vuint16m4_t __riscv_vslidedown_tu(vuint16m4_t vd, vuint16m4_t vs2, size_t rs1,
                                  size_t vl);
vuint16m8_t __riscv_vslidedown_tu(vuint16m8_t vd, vuint16m8_t vs2, size_t rs1,
                                  size_t vl);
vuint32mf2_t __riscv_vslidedown_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                   size_t rs1, size_t vl);
vuint32m1_t __riscv_vslidedown_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t rs1,
                                  size_t vl);
vuint32m2_t __riscv_vslidedown_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t rs1,
                                  size_t vl);
vuint32m4_t __riscv_vslidedown_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t rs1,
                                  size_t vl);
vuint32m8_t __riscv_vslidedown_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t rs1,
                                  size_t vl);
vuint64m1_t __riscv_vslidedown_tu(vuint64m1_t vd, vuint64m1_t vs2, size_t rs1,
                                  size_t vl);
vuint64m2_t __riscv_vslidedown_tu(vuint64m2_t vd, vuint64m2_t vs2, size_t rs1,
                                  size_t vl);
vuint64m4_t __riscv_vslidedown_tu(vuint64m4_t vd, vuint64m4_t vs2, size_t rs1,
                                  size_t vl);
vuint64m8_t __riscv_vslidedown_tu(vuint64m8_t vd, vuint64m8_t vs2, size_t rs1,
                                  size_t vl);
// masked functions
vfloat16mf4_t __riscv_vslidedown_tum(vbool64_t vm, vfloat16mf4_t vd,
                                     vfloat16mf4_t vs2, size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslidedown_tum(vbool32_t vm, vfloat16mf2_t vd,
                                     vfloat16mf2_t vs2, size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslidedown_tum(vbool16_t vm, vfloat16m1_t vd,
                                    vfloat16m1_t vs2, size_t rs1, size_t vl);
vfloat16m2_t __riscv_vslidedown_tum(vbool8_t vm, vfloat16m2_t vd,
                                    vfloat16m2_t vs2, size_t rs1, size_t vl);
vfloat16m4_t __riscv_vslidedown_tum(vbool4_t vm, vfloat16m4_t vd,
                                    vfloat16m4_t vs2, size_t rs1, size_t vl);
vfloat16m8_t __riscv_vslidedown_tum(vbool2_t vm, vfloat16m8_t vd,
                                    vfloat16m8_t vs2, size_t rs1, size_t vl);
vfloat32mf2_t __riscv_vslidedown_tum(vbool64_t vm, vfloat32mf2_t vd,
                                     vfloat32mf2_t vs2, size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslidedown_tum(vbool32_t vm, vfloat32m1_t vd,
                                    vfloat32m1_t vs2, size_t rs1, size_t vl);
vfloat32m2_t __riscv_vslidedown_tum(vbool16_t vm, vfloat32m2_t vd,
                                    vfloat32m2_t vs2, size_t rs1, size_t vl);
vfloat32m4_t __riscv_vslidedown_tum(vbool8_t vm, vfloat32m4_t vd,
                                    vfloat32m4_t vs2, size_t rs1, size_t vl);
vfloat32m8_t __riscv_vslidedown_tum(vbool4_t vm, vfloat32m8_t vd,
                                    vfloat32m8_t vs2, size_t rs1, size_t vl);
vfloat64m1_t __riscv_vslidedown_tum(vbool64_t vm, vfloat64m1_t vd,
                                    vfloat64m1_t vs2, size_t rs1, size_t vl);
vfloat64m2_t __riscv_vslidedown_tum(vbool32_t vm, vfloat64m2_t vd,
                                    vfloat64m2_t vs2, size_t rs1, size_t vl);
vfloat64m4_t __riscv_vslidedown_tum(vbool16_t vm, vfloat64m4_t vd,
                                    vfloat64m4_t vs2, size_t rs1, size_t vl);
vfloat64m8_t __riscv_vslidedown_tum(vbool8_t vm, vfloat64m8_t vd,
                                    vfloat64m8_t vs2, size_t rs1, size_t vl);
vint8mf8_t __riscv_vslidedown_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                  size_t rs1, size_t vl);
vint8mf4_t __riscv_vslidedown_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                  size_t rs1, size_t vl);
vint8mf2_t __riscv_vslidedown_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                  size_t rs1, size_t vl);
vint8m1_t __riscv_vslidedown_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                 size_t rs1, size_t vl);
vint8m2_t __riscv_vslidedown_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                 size_t rs1, size_t vl);
vint8m4_t __riscv_vslidedown_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                 size_t rs1, size_t vl);
vint8m8_t __riscv_vslidedown_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                 size_t rs1, size_t vl);
vint16mf4_t __riscv_vslidedown_tum(vbool64_t vm, vint16mf4_t vd,
                                   vint16mf4_t vs2, size_t rs1, size_t vl);
vint16mf2_t __riscv_vslidedown_tum(vbool32_t vm, vint16mf2_t vd,
                                   vint16mf2_t vs2, size_t rs1, size_t vl);
vint16m1_t __riscv_vslidedown_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                  size_t rs1, size_t vl);
vint16m2_t __riscv_vslidedown_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                  size_t rs1, size_t vl);
vint16m4_t __riscv_vslidedown_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                  size_t rs1, size_t vl);
vint16m8_t __riscv_vslidedown_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                  size_t rs1, size_t vl);
vint32mf2_t __riscv_vslidedown_tum(vbool64_t vm, vint32mf2_t vd,
                                   vint32mf2_t vs2, size_t rs1, size_t vl);
vint32m1_t __riscv_vslidedown_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                  size_t rs1, size_t vl);
vint32m2_t __riscv_vslidedown_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                  size_t rs1, size_t vl);
vint32m4_t __riscv_vslidedown_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                  size_t rs1, size_t vl);
vint32m8_t __riscv_vslidedown_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                  size_t rs1, size_t vl);
vint64m1_t __riscv_vslidedown_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                  size_t rs1, size_t vl);
vint64m2_t __riscv_vslidedown_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                  size_t rs1, size_t vl);
vint64m4_t __riscv_vslidedown_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                  size_t rs1, size_t vl);
vint64m8_t __riscv_vslidedown_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                  size_t rs1, size_t vl);
vuint8mf8_t __riscv_vslidedown_tum(vbool64_t vm, vuint8mf8_t vd,
                                   vuint8mf8_t vs2, size_t rs1, size_t vl);
vuint8mf4_t __riscv_vslidedown_tum(vbool32_t vm, vuint8mf4_t vd,
                                   vuint8mf4_t vs2, size_t rs1, size_t vl);
vuint8mf2_t __riscv_vslidedown_tum(vbool16_t vm, vuint8mf2_t vd,
                                   vuint8mf2_t vs2, size_t rs1, size_t vl);
vuint8m1_t __riscv_vslidedown_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                  size_t rs1, size_t vl);
vuint8m2_t __riscv_vslidedown_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                  size_t rs1, size_t vl);
vuint8m4_t __riscv_vslidedown_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                  size_t rs1, size_t vl);
vuint8m8_t __riscv_vslidedown_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                  size_t rs1, size_t vl);
vuint16mf4_t __riscv_vslidedown_tum(vbool64_t vm, vuint16mf4_t vd,
                                    vuint16mf4_t vs2, size_t rs1, size_t vl);
vuint16mf2_t __riscv_vslidedown_tum(vbool32_t vm, vuint16mf2_t vd,
                                    vuint16mf2_t vs2, size_t rs1, size_t vl);
vuint16m1_t __riscv_vslidedown_tum(vbool16_t vm, vuint16m1_t vd,
                                   vuint16m1_t vs2, size_t rs1, size_t vl);
vuint16m2_t __riscv_vslidedown_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                   size_t rs1, size_t vl);
vuint16m4_t __riscv_vslidedown_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                   size_t rs1, size_t vl);
vuint16m8_t __riscv_vslidedown_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                   size_t rs1, size_t vl);
vuint32mf2_t __riscv_vslidedown_tum(vbool64_t vm, vuint32mf2_t vd,
                                    vuint32mf2_t vs2, size_t rs1, size_t vl);
vuint32m1_t __riscv_vslidedown_tum(vbool32_t vm, vuint32m1_t vd,
                                   vuint32m1_t vs2, size_t rs1, size_t vl);
vuint32m2_t __riscv_vslidedown_tum(vbool16_t vm, vuint32m2_t vd,
                                   vuint32m2_t vs2, size_t rs1, size_t vl);
vuint32m4_t __riscv_vslidedown_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                   size_t rs1, size_t vl);
vuint32m8_t __riscv_vslidedown_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                   size_t rs1, size_t vl);
vuint64m1_t __riscv_vslidedown_tum(vbool64_t vm, vuint64m1_t vd,
                                   vuint64m1_t vs2, size_t rs1, size_t vl);
vuint64m2_t __riscv_vslidedown_tum(vbool32_t vm, vuint64m2_t vd,
                                   vuint64m2_t vs2, size_t rs1, size_t vl);
vuint64m4_t __riscv_vslidedown_tum(vbool16_t vm, vuint64m4_t vd,
                                   vuint64m4_t vs2, size_t rs1, size_t vl);
vuint64m8_t __riscv_vslidedown_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                   size_t rs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vslidedown_tumu(vbool64_t vm, vfloat16mf4_t vd,
                                      vfloat16mf4_t vs2, size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslidedown_tumu(vbool32_t vm, vfloat16mf2_t vd,
                                      vfloat16mf2_t vs2, size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslidedown_tumu(vbool16_t vm, vfloat16m1_t vd,
                                     vfloat16m1_t vs2, size_t rs1, size_t vl);
vfloat16m2_t __riscv_vslidedown_tumu(vbool8_t vm, vfloat16m2_t vd,
                                     vfloat16m2_t vs2, size_t rs1, size_t vl);
vfloat16m4_t __riscv_vslidedown_tumu(vbool4_t vm, vfloat16m4_t vd,
                                     vfloat16m4_t vs2, size_t rs1, size_t vl);
vfloat16m8_t __riscv_vslidedown_tumu(vbool2_t vm, vfloat16m8_t vd,
                                     vfloat16m8_t vs2, size_t rs1, size_t vl);
vfloat32mf2_t __riscv_vslidedown_tumu(vbool64_t vm, vfloat32mf2_t vd,
                                      vfloat32mf2_t vs2, size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslidedown_tumu(vbool32_t vm, vfloat32m1_t vd,
                                     vfloat32m1_t vs2, size_t rs1, size_t vl);
vfloat32m2_t __riscv_vslidedown_tumu(vbool16_t vm, vfloat32m2_t vd,
                                     vfloat32m2_t vs2, size_t rs1, size_t vl);
vfloat32m4_t __riscv_vslidedown_tumu(vbool8_t vm, vfloat32m4_t vd,
                                     vfloat32m4_t vs2, size_t rs1, size_t vl);
vfloat32m8_t __riscv_vslidedown_tumu(vbool4_t vm, vfloat32m8_t vd,
                                     vfloat32m8_t vs2, size_t rs1, size_t vl);
vfloat64m1_t __riscv_vslidedown_tumu(vbool64_t vm, vfloat64m1_t vd,
                                     vfloat64m1_t vs2, size_t rs1, size_t vl);
vfloat64m2_t __riscv_vslidedown_tumu(vbool32_t vm, vfloat64m2_t vd,
                                     vfloat64m2_t vs2, size_t rs1, size_t vl);
vfloat64m4_t __riscv_vslidedown_tumu(vbool16_t vm, vfloat64m4_t vd,
                                     vfloat64m4_t vs2, size_t rs1, size_t vl);
vfloat64m8_t __riscv_vslidedown_tumu(vbool8_t vm, vfloat64m8_t vd,
                                     vfloat64m8_t vs2, size_t rs1, size_t vl);
vint8mf8_t __riscv_vslidedown_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                   size_t rs1, size_t vl);
vint8mf4_t __riscv_vslidedown_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                   size_t rs1, size_t vl);
vint8mf2_t __riscv_vslidedown_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                   size_t rs1, size_t vl);
vint8m1_t __riscv_vslidedown_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                  size_t rs1, size_t vl);
vint8m2_t __riscv_vslidedown_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                  size_t rs1, size_t vl);
vint8m4_t __riscv_vslidedown_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                  size_t rs1, size_t vl);
vint8m8_t __riscv_vslidedown_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                  size_t rs1, size_t vl);
vint16mf4_t __riscv_vslidedown_tumu(vbool64_t vm, vint16mf4_t vd,
                                    vint16mf4_t vs2, size_t rs1, size_t vl);
vint16mf2_t __riscv_vslidedown_tumu(vbool32_t vm, vint16mf2_t vd,
                                    vint16mf2_t vs2, size_t rs1, size_t vl);
vint16m1_t __riscv_vslidedown_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                   size_t rs1, size_t vl);
vint16m2_t __riscv_vslidedown_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                   size_t rs1, size_t vl);
vint16m4_t __riscv_vslidedown_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                   size_t rs1, size_t vl);
vint16m8_t __riscv_vslidedown_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                   size_t rs1, size_t vl);
vint32mf2_t __riscv_vslidedown_tumu(vbool64_t vm, vint32mf2_t vd,
                                    vint32mf2_t vs2, size_t rs1, size_t vl);
vint32m1_t __riscv_vslidedown_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                   size_t rs1, size_t vl);
vint32m2_t __riscv_vslidedown_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                   size_t rs1, size_t vl);
vint32m4_t __riscv_vslidedown_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                   size_t rs1, size_t vl);
vint32m8_t __riscv_vslidedown_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                   size_t rs1, size_t vl);
vint64m1_t __riscv_vslidedown_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                   size_t rs1, size_t vl);
vint64m2_t __riscv_vslidedown_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                   size_t rs1, size_t vl);
vint64m4_t __riscv_vslidedown_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                   size_t rs1, size_t vl);
vint64m8_t __riscv_vslidedown_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                   size_t rs1, size_t vl);
vuint8mf8_t __riscv_vslidedown_tumu(vbool64_t vm, vuint8mf8_t vd,
                                    vuint8mf8_t vs2, size_t rs1, size_t vl);
vuint8mf4_t __riscv_vslidedown_tumu(vbool32_t vm, vuint8mf4_t vd,
                                    vuint8mf4_t vs2, size_t rs1, size_t vl);
vuint8mf2_t __riscv_vslidedown_tumu(vbool16_t vm, vuint8mf2_t vd,
                                    vuint8mf2_t vs2, size_t rs1, size_t vl);
vuint8m1_t __riscv_vslidedown_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                   size_t rs1, size_t vl);
vuint8m2_t __riscv_vslidedown_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                   size_t rs1, size_t vl);
vuint8m4_t __riscv_vslidedown_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                   size_t rs1, size_t vl);
vuint8m8_t __riscv_vslidedown_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                   size_t rs1, size_t vl);
vuint16mf4_t __riscv_vslidedown_tumu(vbool64_t vm, vuint16mf4_t vd,
                                     vuint16mf4_t vs2, size_t rs1, size_t vl);
vuint16mf2_t __riscv_vslidedown_tumu(vbool32_t vm, vuint16mf2_t vd,
                                     vuint16mf2_t vs2, size_t rs1, size_t vl);
vuint16m1_t __riscv_vslidedown_tumu(vbool16_t vm, vuint16m1_t vd,
                                    vuint16m1_t vs2, size_t rs1, size_t vl);
vuint16m2_t __riscv_vslidedown_tumu(vbool8_t vm, vuint16m2_t vd,
                                    vuint16m2_t vs2, size_t rs1, size_t vl);
vuint16m4_t __riscv_vslidedown_tumu(vbool4_t vm, vuint16m4_t vd,
                                    vuint16m4_t vs2, size_t rs1, size_t vl);
vuint16m8_t __riscv_vslidedown_tumu(vbool2_t vm, vuint16m8_t vd,
                                    vuint16m8_t vs2, size_t rs1, size_t vl);
vuint32mf2_t __riscv_vslidedown_tumu(vbool64_t vm, vuint32mf2_t vd,
                                     vuint32mf2_t vs2, size_t rs1, size_t vl);
vuint32m1_t __riscv_vslidedown_tumu(vbool32_t vm, vuint32m1_t vd,
                                    vuint32m1_t vs2, size_t rs1, size_t vl);
vuint32m2_t __riscv_vslidedown_tumu(vbool16_t vm, vuint32m2_t vd,
                                    vuint32m2_t vs2, size_t rs1, size_t vl);
vuint32m4_t __riscv_vslidedown_tumu(vbool8_t vm, vuint32m4_t vd,
                                    vuint32m4_t vs2, size_t rs1, size_t vl);
vuint32m8_t __riscv_vslidedown_tumu(vbool4_t vm, vuint32m8_t vd,
                                    vuint32m8_t vs2, size_t rs1, size_t vl);
vuint64m1_t __riscv_vslidedown_tumu(vbool64_t vm, vuint64m1_t vd,
                                    vuint64m1_t vs2, size_t rs1, size_t vl);
vuint64m2_t __riscv_vslidedown_tumu(vbool32_t vm, vuint64m2_t vd,
                                    vuint64m2_t vs2, size_t rs1, size_t vl);
vuint64m4_t __riscv_vslidedown_tumu(vbool16_t vm, vuint64m4_t vd,
                                    vuint64m4_t vs2, size_t rs1, size_t vl);
vuint64m8_t __riscv_vslidedown_tumu(vbool8_t vm, vuint64m8_t vd,
                                    vuint64m8_t vs2, size_t rs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vslidedown_mu(vbool64_t vm, vfloat16mf4_t vd,
                                    vfloat16mf4_t vs2, size_t rs1, size_t vl);
vfloat16mf2_t __riscv_vslidedown_mu(vbool32_t vm, vfloat16mf2_t vd,
                                    vfloat16mf2_t vs2, size_t rs1, size_t vl);
vfloat16m1_t __riscv_vslidedown_mu(vbool16_t vm, vfloat16m1_t vd,
                                   vfloat16m1_t vs2, size_t rs1, size_t vl);
vfloat16m2_t __riscv_vslidedown_mu(vbool8_t vm, vfloat16m2_t vd,
                                   vfloat16m2_t vs2, size_t rs1, size_t vl);
vfloat16m4_t __riscv_vslidedown_mu(vbool4_t vm, vfloat16m4_t vd,
                                   vfloat16m4_t vs2, size_t rs1, size_t vl);
vfloat16m8_t __riscv_vslidedown_mu(vbool2_t vm, vfloat16m8_t vd,
                                   vfloat16m8_t vs2, size_t rs1, size_t vl);
vfloat32mf2_t __riscv_vslidedown_mu(vbool64_t vm, vfloat32mf2_t vd,
                                    vfloat32mf2_t vs2, size_t rs1, size_t vl);
vfloat32m1_t __riscv_vslidedown_mu(vbool32_t vm, vfloat32m1_t vd,
                                   vfloat32m1_t vs2, size_t rs1, size_t vl);
vfloat32m2_t __riscv_vslidedown_mu(vbool16_t vm, vfloat32m2_t vd,
                                   vfloat32m2_t vs2, size_t rs1, size_t vl);
vfloat32m4_t __riscv_vslidedown_mu(vbool8_t vm, vfloat32m4_t vd,
                                   vfloat32m4_t vs2, size_t rs1, size_t vl);
vfloat32m8_t __riscv_vslidedown_mu(vbool4_t vm, vfloat32m8_t vd,
                                   vfloat32m8_t vs2, size_t rs1, size_t vl);
vfloat64m1_t __riscv_vslidedown_mu(vbool64_t vm, vfloat64m1_t vd,
                                   vfloat64m1_t vs2, size_t rs1, size_t vl);
vfloat64m2_t __riscv_vslidedown_mu(vbool32_t vm, vfloat64m2_t vd,
                                   vfloat64m2_t vs2, size_t rs1, size_t vl);
vfloat64m4_t __riscv_vslidedown_mu(vbool16_t vm, vfloat64m4_t vd,
                                   vfloat64m4_t vs2, size_t rs1, size_t vl);
vfloat64m8_t __riscv_vslidedown_mu(vbool8_t vm, vfloat64m8_t vd,
                                   vfloat64m8_t vs2, size_t rs1, size_t vl);
vint8mf8_t __riscv_vslidedown_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                 size_t rs1, size_t vl);
vint8mf4_t __riscv_vslidedown_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                 size_t rs1, size_t vl);
vint8mf2_t __riscv_vslidedown_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                 size_t rs1, size_t vl);
vint8m1_t __riscv_vslidedown_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                size_t rs1, size_t vl);
vint8m2_t __riscv_vslidedown_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                size_t rs1, size_t vl);
vint8m4_t __riscv_vslidedown_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                size_t rs1, size_t vl);
vint8m8_t __riscv_vslidedown_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                size_t rs1, size_t vl);
vint16mf4_t __riscv_vslidedown_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  size_t rs1, size_t vl);
vint16mf2_t __riscv_vslidedown_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  size_t rs1, size_t vl);
vint16m1_t __riscv_vslidedown_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 size_t rs1, size_t vl);
vint16m2_t __riscv_vslidedown_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 size_t rs1, size_t vl);
vint16m4_t __riscv_vslidedown_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 size_t rs1, size_t vl);
vint16m8_t __riscv_vslidedown_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 size_t rs1, size_t vl);
vint32mf2_t __riscv_vslidedown_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  size_t rs1, size_t vl);
vint32m1_t __riscv_vslidedown_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 size_t rs1, size_t vl);
vint32m2_t __riscv_vslidedown_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 size_t rs1, size_t vl);
vint32m4_t __riscv_vslidedown_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 size_t rs1, size_t vl);
vint32m8_t __riscv_vslidedown_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 size_t rs1, size_t vl);
vint64m1_t __riscv_vslidedown_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 size_t rs1, size_t vl);
vint64m2_t __riscv_vslidedown_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 size_t rs1, size_t vl);
vint64m4_t __riscv_vslidedown_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 size_t rs1, size_t vl);
vint64m8_t __riscv_vslidedown_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 size_t rs1, size_t vl);
vuint8mf8_t __riscv_vslidedown_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                  size_t rs1, size_t vl);
vuint8mf4_t __riscv_vslidedown_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                  size_t rs1, size_t vl);
vuint8mf2_t __riscv_vslidedown_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                  size_t rs1, size_t vl);
vuint8m1_t __riscv_vslidedown_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                 size_t rs1, size_t vl);
vuint8m2_t __riscv_vslidedown_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                 size_t rs1, size_t vl);
vuint8m4_t __riscv_vslidedown_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                 size_t rs1, size_t vl);
vuint8m8_t __riscv_vslidedown_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                 size_t rs1, size_t vl);
vuint16mf4_t __riscv_vslidedown_mu(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, size_t rs1, size_t vl);
vuint16mf2_t __riscv_vslidedown_mu(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, size_t rs1, size_t vl);
vuint16m1_t __riscv_vslidedown_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  size_t rs1, size_t vl);
vuint16m2_t __riscv_vslidedown_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  size_t rs1, size_t vl);
vuint16m4_t __riscv_vslidedown_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  size_t rs1, size_t vl);
vuint16m8_t __riscv_vslidedown_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  size_t rs1, size_t vl);
vuint32mf2_t __riscv_vslidedown_mu(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, size_t rs1, size_t vl);
vuint32m1_t __riscv_vslidedown_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  size_t rs1, size_t vl);
vuint32m2_t __riscv_vslidedown_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  size_t rs1, size_t vl);
vuint32m4_t __riscv_vslidedown_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  size_t rs1, size_t vl);
vuint32m8_t __riscv_vslidedown_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  size_t rs1, size_t vl);
vuint64m1_t __riscv_vslidedown_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  size_t rs1, size_t vl);
vuint64m2_t __riscv_vslidedown_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  size_t rs1, size_t vl);
vuint64m4_t __riscv_vslidedown_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  size_t rs1, size_t vl);
vuint64m8_t __riscv_vslidedown_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  size_t rs1, size_t vl);

Vector Slide1up and Slide1down Intrinsics

vfloat16mf4_t __riscv_vfslide1up_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                    _Float16 rs1, size_t vl);
vfloat16mf2_t __riscv_vfslide1up_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                    _Float16 rs1, size_t vl);
vfloat16m1_t __riscv_vfslide1up_tu(vfloat16m1_t vd, vfloat16m1_t vs2,
                                   _Float16 rs1, size_t vl);
vfloat16m2_t __riscv_vfslide1up_tu(vfloat16m2_t vd, vfloat16m2_t vs2,
                                   _Float16 rs1, size_t vl);
vfloat16m4_t __riscv_vfslide1up_tu(vfloat16m4_t vd, vfloat16m4_t vs2,
                                   _Float16 rs1, size_t vl);
vfloat16m8_t __riscv_vfslide1up_tu(vfloat16m8_t vd, vfloat16m8_t vs2,
                                   _Float16 rs1, size_t vl);
vfloat32mf2_t __riscv_vfslide1up_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                    float rs1, size_t vl);
vfloat32m1_t __riscv_vfslide1up_tu(vfloat32m1_t vd, vfloat32m1_t vs2, float rs1,
                                   size_t vl);
vfloat32m2_t __riscv_vfslide1up_tu(vfloat32m2_t vd, vfloat32m2_t vs2, float rs1,
                                   size_t vl);
vfloat32m4_t __riscv_vfslide1up_tu(vfloat32m4_t vd, vfloat32m4_t vs2, float rs1,
                                   size_t vl);
vfloat32m8_t __riscv_vfslide1up_tu(vfloat32m8_t vd, vfloat32m8_t vs2, float rs1,
                                   size_t vl);
vfloat64m1_t __riscv_vfslide1up_tu(vfloat64m1_t vd, vfloat64m1_t vs2,
                                   double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1up_tu(vfloat64m2_t vd, vfloat64m2_t vs2,
                                   double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1up_tu(vfloat64m4_t vd, vfloat64m4_t vs2,
                                   double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1up_tu(vfloat64m8_t vd, vfloat64m8_t vs2,
                                   double rs1, size_t vl);
vfloat16mf4_t __riscv_vfslide1down_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                      _Float16 rs1, size_t vl);
vfloat16mf2_t __riscv_vfslide1down_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                      _Float16 rs1, size_t vl);
vfloat16m1_t __riscv_vfslide1down_tu(vfloat16m1_t vd, vfloat16m1_t vs2,
                                     _Float16 rs1, size_t vl);
vfloat16m2_t __riscv_vfslide1down_tu(vfloat16m2_t vd, vfloat16m2_t vs2,
                                     _Float16 rs1, size_t vl);
vfloat16m4_t __riscv_vfslide1down_tu(vfloat16m4_t vd, vfloat16m4_t vs2,
                                     _Float16 rs1, size_t vl);
vfloat16m8_t __riscv_vfslide1down_tu(vfloat16m8_t vd, vfloat16m8_t vs2,
                                     _Float16 rs1, size_t vl);
vfloat32mf2_t __riscv_vfslide1down_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                      float rs1, size_t vl);
vfloat32m1_t __riscv_vfslide1down_tu(vfloat32m1_t vd, vfloat32m1_t vs2,
                                     float rs1, size_t vl);
vfloat32m2_t __riscv_vfslide1down_tu(vfloat32m2_t vd, vfloat32m2_t vs2,
                                     float rs1, size_t vl);
vfloat32m4_t __riscv_vfslide1down_tu(vfloat32m4_t vd, vfloat32m4_t vs2,
                                     float rs1, size_t vl);
vfloat32m8_t __riscv_vfslide1down_tu(vfloat32m8_t vd, vfloat32m8_t vs2,
                                     float rs1, size_t vl);
vfloat64m1_t __riscv_vfslide1down_tu(vfloat64m1_t vd, vfloat64m1_t vs2,
                                     double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1down_tu(vfloat64m2_t vd, vfloat64m2_t vs2,
                                     double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1down_tu(vfloat64m4_t vd, vfloat64m4_t vs2,
                                     double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1down_tu(vfloat64m8_t vd, vfloat64m8_t vs2,
                                     double rs1, size_t vl);
vint8mf8_t __riscv_vslide1up_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                                size_t vl);
vint8mf4_t __riscv_vslide1up_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                                size_t vl);
vint8mf2_t __riscv_vslide1up_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                                size_t vl);
vint8m1_t __riscv_vslide1up_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                               size_t vl);
vint8m2_t __riscv_vslide1up_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                               size_t vl);
vint8m4_t __riscv_vslide1up_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                               size_t vl);
vint8m8_t __riscv_vslide1up_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                               size_t vl);
vint16mf4_t __riscv_vslide1up_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                                 size_t vl);
vint16mf2_t __riscv_vslide1up_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                                 size_t vl);
vint16m1_t __riscv_vslide1up_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                                size_t vl);
vint16m2_t __riscv_vslide1up_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                                size_t vl);
vint16m4_t __riscv_vslide1up_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                                size_t vl);
vint16m8_t __riscv_vslide1up_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                                size_t vl);
vint32mf2_t __riscv_vslide1up_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                                 size_t vl);
vint32m1_t __riscv_vslide1up_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                                size_t vl);
vint32m2_t __riscv_vslide1up_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                                size_t vl);
vint32m4_t __riscv_vslide1up_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                                size_t vl);
vint32m8_t __riscv_vslide1up_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                                size_t vl);
vint64m1_t __riscv_vslide1up_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                                size_t vl);
vint64m2_t __riscv_vslide1up_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                                size_t vl);
vint64m4_t __riscv_vslide1up_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                                size_t vl);
vint64m8_t __riscv_vslide1up_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                                size_t vl);
vint8mf8_t __riscv_vslide1down_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                                  size_t vl);
vint8mf4_t __riscv_vslide1down_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                                  size_t vl);
vint8mf2_t __riscv_vslide1down_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                                  size_t vl);
vint8m1_t __riscv_vslide1down_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                                 size_t vl);
vint8m2_t __riscv_vslide1down_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                                 size_t vl);
vint8m4_t __riscv_vslide1down_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                                 size_t vl);
vint8m8_t __riscv_vslide1down_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                                 size_t vl);
vint16mf4_t __riscv_vslide1down_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                                   size_t vl);
vint16mf2_t __riscv_vslide1down_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                                   size_t vl);
vint16m1_t __riscv_vslide1down_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                                  size_t vl);
vint16m2_t __riscv_vslide1down_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                                  size_t vl);
vint16m4_t __riscv_vslide1down_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                                  size_t vl);
vint16m8_t __riscv_vslide1down_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                                  size_t vl);
vint32mf2_t __riscv_vslide1down_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                                   size_t vl);
vint32m1_t __riscv_vslide1down_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                                  size_t vl);
vint32m2_t __riscv_vslide1down_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                                  size_t vl);
vint32m4_t __riscv_vslide1down_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                                  size_t vl);
vint32m8_t __riscv_vslide1down_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                                  size_t vl);
vint64m1_t __riscv_vslide1down_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                                  size_t vl);
vint64m2_t __riscv_vslide1down_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                                  size_t vl);
vint64m4_t __riscv_vslide1down_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                                  size_t vl);
vint64m8_t __riscv_vslide1down_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                                  size_t vl);
vuint8mf8_t __riscv_vslide1up_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                                 size_t vl);
vuint8mf4_t __riscv_vslide1up_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                                 size_t vl);
vuint8mf2_t __riscv_vslide1up_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                                 size_t vl);
vuint8m1_t __riscv_vslide1up_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                                size_t vl);
vuint8m2_t __riscv_vslide1up_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                                size_t vl);
vuint8m4_t __riscv_vslide1up_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                                size_t vl);
vuint8m8_t __riscv_vslide1up_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                                size_t vl);
vuint16mf4_t __riscv_vslide1up_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vslide1up_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vslide1up_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                                 size_t vl);
vuint16m2_t __riscv_vslide1up_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                                 size_t vl);
vuint16m4_t __riscv_vslide1up_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                                 size_t vl);
vuint16m8_t __riscv_vslide1up_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32mf2_t __riscv_vslide1up_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                  uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vslide1up_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                                 size_t vl);
vuint32m2_t __riscv_vslide1up_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                                 size_t vl);
vuint32m4_t __riscv_vslide1up_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                                 size_t vl);
vuint32m8_t __riscv_vslide1up_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m1_t __riscv_vslide1up_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                                 size_t vl);
vuint64m2_t __riscv_vslide1up_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                                 size_t vl);
vuint64m4_t __riscv_vslide1up_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                                 size_t vl);
vuint64m8_t __riscv_vslide1up_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                                 size_t vl);
vuint8mf8_t __riscv_vslide1down_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                                   size_t vl);
vuint8mf4_t __riscv_vslide1down_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                                   size_t vl);
vuint8mf2_t __riscv_vslide1down_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                                   size_t vl);
vuint8m1_t __riscv_vslide1down_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                                  size_t vl);
vuint8m2_t __riscv_vslide1down_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                                  size_t vl);
vuint8m4_t __riscv_vslide1down_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                                  size_t vl);
vuint8m8_t __riscv_vslide1down_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                                  size_t vl);
vuint16mf4_t __riscv_vslide1down_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                    uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vslide1down_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                    uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vslide1down_tu(vuint16m1_t vd, vuint16m1_t vs2,
                                   uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vslide1down_tu(vuint16m2_t vd, vuint16m2_t vs2,
                                   uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vslide1down_tu(vuint16m4_t vd, vuint16m4_t vs2,
                                   uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vslide1down_tu(vuint16m8_t vd, vuint16m8_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vslide1down_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                    uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vslide1down_tu(vuint32m1_t vd, vuint32m1_t vs2,
                                   uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vslide1down_tu(vuint32m2_t vd, vuint32m2_t vs2,
                                   uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vslide1down_tu(vuint32m4_t vd, vuint32m4_t vs2,
                                   uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vslide1down_tu(vuint32m8_t vd, vuint32m8_t vs2,
                                   uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vslide1down_tu(vuint64m1_t vd, vuint64m1_t vs2,
                                   uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vslide1down_tu(vuint64m2_t vd, vuint64m2_t vs2,
                                   uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vslide1down_tu(vuint64m4_t vd, vuint64m4_t vs2,
                                   uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vslide1down_tu(vuint64m8_t vd, vuint64m8_t vs2,
                                   uint64_t rs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vfslide1up_tum(vbool64_t vm, vfloat16mf4_t vd,
                                     vfloat16mf4_t vs2, _Float16 rs1,
                                     size_t vl);
vfloat16mf2_t __riscv_vfslide1up_tum(vbool32_t vm, vfloat16mf2_t vd,
                                     vfloat16mf2_t vs2, _Float16 rs1,
                                     size_t vl);
vfloat16m1_t __riscv_vfslide1up_tum(vbool16_t vm, vfloat16m1_t vd,
                                    vfloat16m1_t vs2, _Float16 rs1, size_t vl);
vfloat16m2_t __riscv_vfslide1up_tum(vbool8_t vm, vfloat16m2_t vd,
                                    vfloat16m2_t vs2, _Float16 rs1, size_t vl);
vfloat16m4_t __riscv_vfslide1up_tum(vbool4_t vm, vfloat16m4_t vd,
                                    vfloat16m4_t vs2, _Float16 rs1, size_t vl);
vfloat16m8_t __riscv_vfslide1up_tum(vbool2_t vm, vfloat16m8_t vd,
                                    vfloat16m8_t vs2, _Float16 rs1, size_t vl);
vfloat32mf2_t __riscv_vfslide1up_tum(vbool64_t vm, vfloat32mf2_t vd,
                                     vfloat32mf2_t vs2, float rs1, size_t vl);
vfloat32m1_t __riscv_vfslide1up_tum(vbool32_t vm, vfloat32m1_t vd,
                                    vfloat32m1_t vs2, float rs1, size_t vl);
vfloat32m2_t __riscv_vfslide1up_tum(vbool16_t vm, vfloat32m2_t vd,
                                    vfloat32m2_t vs2, float rs1, size_t vl);
vfloat32m4_t __riscv_vfslide1up_tum(vbool8_t vm, vfloat32m4_t vd,
                                    vfloat32m4_t vs2, float rs1, size_t vl);
vfloat32m8_t __riscv_vfslide1up_tum(vbool4_t vm, vfloat32m8_t vd,
                                    vfloat32m8_t vs2, float rs1, size_t vl);
vfloat64m1_t __riscv_vfslide1up_tum(vbool64_t vm, vfloat64m1_t vd,
                                    vfloat64m1_t vs2, double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1up_tum(vbool32_t vm, vfloat64m2_t vd,
                                    vfloat64m2_t vs2, double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1up_tum(vbool16_t vm, vfloat64m4_t vd,
                                    vfloat64m4_t vs2, double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1up_tum(vbool8_t vm, vfloat64m8_t vd,
                                    vfloat64m8_t vs2, double rs1, size_t vl);
vfloat16mf4_t __riscv_vfslide1down_tum(vbool64_t vm, vfloat16mf4_t vd,
                                       vfloat16mf4_t vs2, _Float16 rs1,
                                       size_t vl);
vfloat16mf2_t __riscv_vfslide1down_tum(vbool32_t vm, vfloat16mf2_t vd,
                                       vfloat16mf2_t vs2, _Float16 rs1,
                                       size_t vl);
vfloat16m1_t __riscv_vfslide1down_tum(vbool16_t vm, vfloat16m1_t vd,
                                      vfloat16m1_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat16m2_t __riscv_vfslide1down_tum(vbool8_t vm, vfloat16m2_t vd,
                                      vfloat16m2_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat16m4_t __riscv_vfslide1down_tum(vbool4_t vm, vfloat16m4_t vd,
                                      vfloat16m4_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat16m8_t __riscv_vfslide1down_tum(vbool2_t vm, vfloat16m8_t vd,
                                      vfloat16m8_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat32mf2_t __riscv_vfslide1down_tum(vbool64_t vm, vfloat32mf2_t vd,
                                       vfloat32mf2_t vs2, float rs1, size_t vl);
vfloat32m1_t __riscv_vfslide1down_tum(vbool32_t vm, vfloat32m1_t vd,
                                      vfloat32m1_t vs2, float rs1, size_t vl);
vfloat32m2_t __riscv_vfslide1down_tum(vbool16_t vm, vfloat32m2_t vd,
                                      vfloat32m2_t vs2, float rs1, size_t vl);
vfloat32m4_t __riscv_vfslide1down_tum(vbool8_t vm, vfloat32m4_t vd,
                                      vfloat32m4_t vs2, float rs1, size_t vl);
vfloat32m8_t __riscv_vfslide1down_tum(vbool4_t vm, vfloat32m8_t vd,
                                      vfloat32m8_t vs2, float rs1, size_t vl);
vfloat64m1_t __riscv_vfslide1down_tum(vbool64_t vm, vfloat64m1_t vd,
                                      vfloat64m1_t vs2, double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1down_tum(vbool32_t vm, vfloat64m2_t vd,
                                      vfloat64m2_t vs2, double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1down_tum(vbool16_t vm, vfloat64m4_t vd,
                                      vfloat64m4_t vs2, double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1down_tum(vbool8_t vm, vfloat64m8_t vd,
                                      vfloat64m8_t vs2, double rs1, size_t vl);
vint8mf8_t __riscv_vslide1up_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                 int8_t rs1, size_t vl);
vint8mf4_t __riscv_vslide1up_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                 int8_t rs1, size_t vl);
vint8mf2_t __riscv_vslide1up_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                 int8_t rs1, size_t vl);
vint8m1_t __riscv_vslide1up_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                int8_t rs1, size_t vl);
vint8m2_t __riscv_vslide1up_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                int8_t rs1, size_t vl);
vint8m4_t __riscv_vslide1up_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                int8_t rs1, size_t vl);
vint8m8_t __riscv_vslide1up_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                int8_t rs1, size_t vl);
vint16mf4_t __riscv_vslide1up_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  int16_t rs1, size_t vl);
vint16mf2_t __riscv_vslide1up_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  int16_t rs1, size_t vl);
vint16m1_t __riscv_vslide1up_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 int16_t rs1, size_t vl);
vint16m2_t __riscv_vslide1up_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 int16_t rs1, size_t vl);
vint16m4_t __riscv_vslide1up_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 int16_t rs1, size_t vl);
vint16m8_t __riscv_vslide1up_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 int16_t rs1, size_t vl);
vint32mf2_t __riscv_vslide1up_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  int32_t rs1, size_t vl);
vint32m1_t __riscv_vslide1up_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 int32_t rs1, size_t vl);
vint32m2_t __riscv_vslide1up_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 int32_t rs1, size_t vl);
vint32m4_t __riscv_vslide1up_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 int32_t rs1, size_t vl);
vint32m8_t __riscv_vslide1up_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 int32_t rs1, size_t vl);
vint64m1_t __riscv_vslide1up_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 int64_t rs1, size_t vl);
vint64m2_t __riscv_vslide1up_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 int64_t rs1, size_t vl);
vint64m4_t __riscv_vslide1up_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 int64_t rs1, size_t vl);
vint64m8_t __riscv_vslide1up_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 int64_t rs1, size_t vl);
vint8mf8_t __riscv_vslide1down_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                   int8_t rs1, size_t vl);
vint8mf4_t __riscv_vslide1down_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                   int8_t rs1, size_t vl);
vint8mf2_t __riscv_vslide1down_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                   int8_t rs1, size_t vl);
vint8m1_t __riscv_vslide1down_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                  int8_t rs1, size_t vl);
vint8m2_t __riscv_vslide1down_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                  int8_t rs1, size_t vl);
vint8m4_t __riscv_vslide1down_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                  int8_t rs1, size_t vl);
vint8m8_t __riscv_vslide1down_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                  int8_t rs1, size_t vl);
vint16mf4_t __riscv_vslide1down_tum(vbool64_t vm, vint16mf4_t vd,
                                    vint16mf4_t vs2, int16_t rs1, size_t vl);
vint16mf2_t __riscv_vslide1down_tum(vbool32_t vm, vint16mf2_t vd,
                                    vint16mf2_t vs2, int16_t rs1, size_t vl);
vint16m1_t __riscv_vslide1down_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                   int16_t rs1, size_t vl);
vint16m2_t __riscv_vslide1down_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                   int16_t rs1, size_t vl);
vint16m4_t __riscv_vslide1down_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                   int16_t rs1, size_t vl);
vint16m8_t __riscv_vslide1down_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                   int16_t rs1, size_t vl);
vint32mf2_t __riscv_vslide1down_tum(vbool64_t vm, vint32mf2_t vd,
                                    vint32mf2_t vs2, int32_t rs1, size_t vl);
vint32m1_t __riscv_vslide1down_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                   int32_t rs1, size_t vl);
vint32m2_t __riscv_vslide1down_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                   int32_t rs1, size_t vl);
vint32m4_t __riscv_vslide1down_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                   int32_t rs1, size_t vl);
vint32m8_t __riscv_vslide1down_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                   int32_t rs1, size_t vl);
vint64m1_t __riscv_vslide1down_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                   int64_t rs1, size_t vl);
vint64m2_t __riscv_vslide1down_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                   int64_t rs1, size_t vl);
vint64m4_t __riscv_vslide1down_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                   int64_t rs1, size_t vl);
vint64m8_t __riscv_vslide1down_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                   int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vslide1up_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vslide1up_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vslide1up_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vslide1up_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                 uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vslide1up_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vslide1up_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                 uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vslide1up_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vslide1up_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vslide1up_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vslide1up_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vslide1up_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vslide1up_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vslide1up_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vslide1up_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vslide1up_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vslide1up_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vslide1up_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vslide1up_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vslide1up_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vslide1up_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vslide1up_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vslide1up_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vslide1down_tum(vbool64_t vm, vuint8mf8_t vd,
                                    vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vslide1down_tum(vbool32_t vm, vuint8mf4_t vd,
                                    vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vslide1down_tum(vbool16_t vm, vuint8mf2_t vd,
                                    vuint8mf2_t vs2, uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vslide1down_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                   uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vslide1down_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                   uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vslide1down_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                   uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vslide1down_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vslide1down_tum(vbool64_t vm, vuint16mf4_t vd,
                                     vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vslide1down_tum(vbool32_t vm, vuint16mf2_t vd,
                                     vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vslide1down_tum(vbool16_t vm, vuint16m1_t vd,
                                    vuint16m1_t vs2, uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vslide1down_tum(vbool8_t vm, vuint16m2_t vd,
                                    vuint16m2_t vs2, uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vslide1down_tum(vbool4_t vm, vuint16m4_t vd,
                                    vuint16m4_t vs2, uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vslide1down_tum(vbool2_t vm, vuint16m8_t vd,
                                    vuint16m8_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vslide1down_tum(vbool64_t vm, vuint32mf2_t vd,
                                     vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vslide1down_tum(vbool32_t vm, vuint32m1_t vd,
                                    vuint32m1_t vs2, uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vslide1down_tum(vbool16_t vm, vuint32m2_t vd,
                                    vuint32m2_t vs2, uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vslide1down_tum(vbool8_t vm, vuint32m4_t vd,
                                    vuint32m4_t vs2, uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vslide1down_tum(vbool4_t vm, vuint32m8_t vd,
                                    vuint32m8_t vs2, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vslide1down_tum(vbool64_t vm, vuint64m1_t vd,
                                    vuint64m1_t vs2, uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vslide1down_tum(vbool32_t vm, vuint64m2_t vd,
                                    vuint64m2_t vs2, uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vslide1down_tum(vbool16_t vm, vuint64m4_t vd,
                                    vuint64m4_t vs2, uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vslide1down_tum(vbool8_t vm, vuint64m8_t vd,
                                    vuint64m8_t vs2, uint64_t rs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vfslide1up_tumu(vbool64_t vm, vfloat16mf4_t vd,
                                      vfloat16mf4_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat16mf2_t __riscv_vfslide1up_tumu(vbool32_t vm, vfloat16mf2_t vd,
                                      vfloat16mf2_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat16m1_t __riscv_vfslide1up_tumu(vbool16_t vm, vfloat16m1_t vd,
                                     vfloat16m1_t vs2, _Float16 rs1, size_t vl);
vfloat16m2_t __riscv_vfslide1up_tumu(vbool8_t vm, vfloat16m2_t vd,
                                     vfloat16m2_t vs2, _Float16 rs1, size_t vl);
vfloat16m4_t __riscv_vfslide1up_tumu(vbool4_t vm, vfloat16m4_t vd,
                                     vfloat16m4_t vs2, _Float16 rs1, size_t vl);
vfloat16m8_t __riscv_vfslide1up_tumu(vbool2_t vm, vfloat16m8_t vd,
                                     vfloat16m8_t vs2, _Float16 rs1, size_t vl);
vfloat32mf2_t __riscv_vfslide1up_tumu(vbool64_t vm, vfloat32mf2_t vd,
                                      vfloat32mf2_t vs2, float rs1, size_t vl);
vfloat32m1_t __riscv_vfslide1up_tumu(vbool32_t vm, vfloat32m1_t vd,
                                     vfloat32m1_t vs2, float rs1, size_t vl);
vfloat32m2_t __riscv_vfslide1up_tumu(vbool16_t vm, vfloat32m2_t vd,
                                     vfloat32m2_t vs2, float rs1, size_t vl);
vfloat32m4_t __riscv_vfslide1up_tumu(vbool8_t vm, vfloat32m4_t vd,
                                     vfloat32m4_t vs2, float rs1, size_t vl);
vfloat32m8_t __riscv_vfslide1up_tumu(vbool4_t vm, vfloat32m8_t vd,
                                     vfloat32m8_t vs2, float rs1, size_t vl);
vfloat64m1_t __riscv_vfslide1up_tumu(vbool64_t vm, vfloat64m1_t vd,
                                     vfloat64m1_t vs2, double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1up_tumu(vbool32_t vm, vfloat64m2_t vd,
                                     vfloat64m2_t vs2, double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1up_tumu(vbool16_t vm, vfloat64m4_t vd,
                                     vfloat64m4_t vs2, double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1up_tumu(vbool8_t vm, vfloat64m8_t vd,
                                     vfloat64m8_t vs2, double rs1, size_t vl);
vfloat16mf4_t __riscv_vfslide1down_tumu(vbool64_t vm, vfloat16mf4_t vd,
                                        vfloat16mf4_t vs2, _Float16 rs1,
                                        size_t vl);
vfloat16mf2_t __riscv_vfslide1down_tumu(vbool32_t vm, vfloat16mf2_t vd,
                                        vfloat16mf2_t vs2, _Float16 rs1,
                                        size_t vl);
vfloat16m1_t __riscv_vfslide1down_tumu(vbool16_t vm, vfloat16m1_t vd,
                                       vfloat16m1_t vs2, _Float16 rs1,
                                       size_t vl);
vfloat16m2_t __riscv_vfslide1down_tumu(vbool8_t vm, vfloat16m2_t vd,
                                       vfloat16m2_t vs2, _Float16 rs1,
                                       size_t vl);
vfloat16m4_t __riscv_vfslide1down_tumu(vbool4_t vm, vfloat16m4_t vd,
                                       vfloat16m4_t vs2, _Float16 rs1,
                                       size_t vl);
vfloat16m8_t __riscv_vfslide1down_tumu(vbool2_t vm, vfloat16m8_t vd,
                                       vfloat16m8_t vs2, _Float16 rs1,
                                       size_t vl);
vfloat32mf2_t __riscv_vfslide1down_tumu(vbool64_t vm, vfloat32mf2_t vd,
                                        vfloat32mf2_t vs2, float rs1,
                                        size_t vl);
vfloat32m1_t __riscv_vfslide1down_tumu(vbool32_t vm, vfloat32m1_t vd,
                                       vfloat32m1_t vs2, float rs1, size_t vl);
vfloat32m2_t __riscv_vfslide1down_tumu(vbool16_t vm, vfloat32m2_t vd,
                                       vfloat32m2_t vs2, float rs1, size_t vl);
vfloat32m4_t __riscv_vfslide1down_tumu(vbool8_t vm, vfloat32m4_t vd,
                                       vfloat32m4_t vs2, float rs1, size_t vl);
vfloat32m8_t __riscv_vfslide1down_tumu(vbool4_t vm, vfloat32m8_t vd,
                                       vfloat32m8_t vs2, float rs1, size_t vl);
vfloat64m1_t __riscv_vfslide1down_tumu(vbool64_t vm, vfloat64m1_t vd,
                                       vfloat64m1_t vs2, double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1down_tumu(vbool32_t vm, vfloat64m2_t vd,
                                       vfloat64m2_t vs2, double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1down_tumu(vbool16_t vm, vfloat64m4_t vd,
                                       vfloat64m4_t vs2, double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1down_tumu(vbool8_t vm, vfloat64m8_t vd,
                                       vfloat64m8_t vs2, double rs1, size_t vl);
vint8mf8_t __riscv_vslide1up_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                  int8_t rs1, size_t vl);
vint8mf4_t __riscv_vslide1up_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                  int8_t rs1, size_t vl);
vint8mf2_t __riscv_vslide1up_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                  int8_t rs1, size_t vl);
vint8m1_t __riscv_vslide1up_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                 int8_t rs1, size_t vl);
vint8m2_t __riscv_vslide1up_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                 int8_t rs1, size_t vl);
vint8m4_t __riscv_vslide1up_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                 int8_t rs1, size_t vl);
vint8m8_t __riscv_vslide1up_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf4_t __riscv_vslide1up_tumu(vbool64_t vm, vint16mf4_t vd,
                                   vint16mf4_t vs2, int16_t rs1, size_t vl);
vint16mf2_t __riscv_vslide1up_tumu(vbool32_t vm, vint16mf2_t vd,
                                   vint16mf2_t vs2, int16_t rs1, size_t vl);
vint16m1_t __riscv_vslide1up_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                  int16_t rs1, size_t vl);
vint16m2_t __riscv_vslide1up_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                  int16_t rs1, size_t vl);
vint16m4_t __riscv_vslide1up_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                  int16_t rs1, size_t vl);
vint16m8_t __riscv_vslide1up_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                  int16_t rs1, size_t vl);
vint32mf2_t __riscv_vslide1up_tumu(vbool64_t vm, vint32mf2_t vd,
                                   vint32mf2_t vs2, int32_t rs1, size_t vl);
vint32m1_t __riscv_vslide1up_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                  int32_t rs1, size_t vl);
vint32m2_t __riscv_vslide1up_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                  int32_t rs1, size_t vl);
vint32m4_t __riscv_vslide1up_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                  int32_t rs1, size_t vl);
vint32m8_t __riscv_vslide1up_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                  int32_t rs1, size_t vl);
vint64m1_t __riscv_vslide1up_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                  int64_t rs1, size_t vl);
vint64m2_t __riscv_vslide1up_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                  int64_t rs1, size_t vl);
vint64m4_t __riscv_vslide1up_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                  int64_t rs1, size_t vl);
vint64m8_t __riscv_vslide1up_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                  int64_t rs1, size_t vl);
vint8mf8_t __riscv_vslide1down_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                    int8_t rs1, size_t vl);
vint8mf4_t __riscv_vslide1down_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                    int8_t rs1, size_t vl);
vint8mf2_t __riscv_vslide1down_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                    int8_t rs1, size_t vl);
vint8m1_t __riscv_vslide1down_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                   int8_t rs1, size_t vl);
vint8m2_t __riscv_vslide1down_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                   int8_t rs1, size_t vl);
vint8m4_t __riscv_vslide1down_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                   int8_t rs1, size_t vl);
vint8m8_t __riscv_vslide1down_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                   int8_t rs1, size_t vl);
vint16mf4_t __riscv_vslide1down_tumu(vbool64_t vm, vint16mf4_t vd,
                                     vint16mf4_t vs2, int16_t rs1, size_t vl);
vint16mf2_t __riscv_vslide1down_tumu(vbool32_t vm, vint16mf2_t vd,
                                     vint16mf2_t vs2, int16_t rs1, size_t vl);
vint16m1_t __riscv_vslide1down_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                    int16_t rs1, size_t vl);
vint16m2_t __riscv_vslide1down_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                    int16_t rs1, size_t vl);
vint16m4_t __riscv_vslide1down_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                    int16_t rs1, size_t vl);
vint16m8_t __riscv_vslide1down_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                    int16_t rs1, size_t vl);
vint32mf2_t __riscv_vslide1down_tumu(vbool64_t vm, vint32mf2_t vd,
                                     vint32mf2_t vs2, int32_t rs1, size_t vl);
vint32m1_t __riscv_vslide1down_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                    int32_t rs1, size_t vl);
vint32m2_t __riscv_vslide1down_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                    int32_t rs1, size_t vl);
vint32m4_t __riscv_vslide1down_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                    int32_t rs1, size_t vl);
vint32m8_t __riscv_vslide1down_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                    int32_t rs1, size_t vl);
vint64m1_t __riscv_vslide1down_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                    int64_t rs1, size_t vl);
vint64m2_t __riscv_vslide1down_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                    int64_t rs1, size_t vl);
vint64m4_t __riscv_vslide1down_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                    int64_t rs1, size_t vl);
vint64m8_t __riscv_vslide1down_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                    int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vslide1up_tumu(vbool64_t vm, vuint8mf8_t vd,
                                   vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vslide1up_tumu(vbool32_t vm, vuint8mf4_t vd,
                                   vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vslide1up_tumu(vbool16_t vm, vuint8mf2_t vd,
                                   vuint8mf2_t vs2, uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vslide1up_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vslide1up_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vslide1up_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vslide1up_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vslide1up_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vslide1up_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vslide1up_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint16m1_t vs2, uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vslide1up_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                   uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vslide1up_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                   uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vslide1up_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vslide1up_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vslide1up_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint32m1_t vs2, uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vslide1up_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint32m2_t vs2, uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vslide1up_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                   uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vslide1up_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                   uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vslide1up_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint64m1_t vs2, uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vslide1up_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint64m2_t vs2, uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vslide1up_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint64m4_t vs2, uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vslide1up_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                   uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vslide1down_tumu(vbool64_t vm, vuint8mf8_t vd,
                                     vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vslide1down_tumu(vbool32_t vm, vuint8mf4_t vd,
                                     vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vslide1down_tumu(vbool16_t vm, vuint8mf2_t vd,
                                     vuint8mf2_t vs2, uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vslide1down_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                    uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vslide1down_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                    uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vslide1down_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                    uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vslide1down_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                    uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vslide1down_tumu(vbool64_t vm, vuint16mf4_t vd,
                                      vuint16mf4_t vs2, uint16_t rs1,
                                      size_t vl);
vuint16mf2_t __riscv_vslide1down_tumu(vbool32_t vm, vuint16mf2_t vd,
                                      vuint16mf2_t vs2, uint16_t rs1,
                                      size_t vl);
vuint16m1_t __riscv_vslide1down_tumu(vbool16_t vm, vuint16m1_t vd,
                                     vuint16m1_t vs2, uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vslide1down_tumu(vbool8_t vm, vuint16m2_t vd,
                                     vuint16m2_t vs2, uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vslide1down_tumu(vbool4_t vm, vuint16m4_t vd,
                                     vuint16m4_t vs2, uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vslide1down_tumu(vbool2_t vm, vuint16m8_t vd,
                                     vuint16m8_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vslide1down_tumu(vbool64_t vm, vuint32mf2_t vd,
                                      vuint32mf2_t vs2, uint32_t rs1,
                                      size_t vl);
vuint32m1_t __riscv_vslide1down_tumu(vbool32_t vm, vuint32m1_t vd,
                                     vuint32m1_t vs2, uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vslide1down_tumu(vbool16_t vm, vuint32m2_t vd,
                                     vuint32m2_t vs2, uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vslide1down_tumu(vbool8_t vm, vuint32m4_t vd,
                                     vuint32m4_t vs2, uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vslide1down_tumu(vbool4_t vm, vuint32m8_t vd,
                                     vuint32m8_t vs2, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vslide1down_tumu(vbool64_t vm, vuint64m1_t vd,
                                     vuint64m1_t vs2, uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vslide1down_tumu(vbool32_t vm, vuint64m2_t vd,
                                     vuint64m2_t vs2, uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vslide1down_tumu(vbool16_t vm, vuint64m4_t vd,
                                     vuint64m4_t vs2, uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vslide1down_tumu(vbool8_t vm, vuint64m8_t vd,
                                     vuint64m8_t vs2, uint64_t rs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vfslide1up_mu(vbool64_t vm, vfloat16mf4_t vd,
                                    vfloat16mf4_t vs2, _Float16 rs1, size_t vl);
vfloat16mf2_t __riscv_vfslide1up_mu(vbool32_t vm, vfloat16mf2_t vd,
                                    vfloat16mf2_t vs2, _Float16 rs1, size_t vl);
vfloat16m1_t __riscv_vfslide1up_mu(vbool16_t vm, vfloat16m1_t vd,
                                   vfloat16m1_t vs2, _Float16 rs1, size_t vl);
vfloat16m2_t __riscv_vfslide1up_mu(vbool8_t vm, vfloat16m2_t vd,
                                   vfloat16m2_t vs2, _Float16 rs1, size_t vl);
vfloat16m4_t __riscv_vfslide1up_mu(vbool4_t vm, vfloat16m4_t vd,
                                   vfloat16m4_t vs2, _Float16 rs1, size_t vl);
vfloat16m8_t __riscv_vfslide1up_mu(vbool2_t vm, vfloat16m8_t vd,
                                   vfloat16m8_t vs2, _Float16 rs1, size_t vl);
vfloat32mf2_t __riscv_vfslide1up_mu(vbool64_t vm, vfloat32mf2_t vd,
                                    vfloat32mf2_t vs2, float rs1, size_t vl);
vfloat32m1_t __riscv_vfslide1up_mu(vbool32_t vm, vfloat32m1_t vd,
                                   vfloat32m1_t vs2, float rs1, size_t vl);
vfloat32m2_t __riscv_vfslide1up_mu(vbool16_t vm, vfloat32m2_t vd,
                                   vfloat32m2_t vs2, float rs1, size_t vl);
vfloat32m4_t __riscv_vfslide1up_mu(vbool8_t vm, vfloat32m4_t vd,
                                   vfloat32m4_t vs2, float rs1, size_t vl);
vfloat32m8_t __riscv_vfslide1up_mu(vbool4_t vm, vfloat32m8_t vd,
                                   vfloat32m8_t vs2, float rs1, size_t vl);
vfloat64m1_t __riscv_vfslide1up_mu(vbool64_t vm, vfloat64m1_t vd,
                                   vfloat64m1_t vs2, double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1up_mu(vbool32_t vm, vfloat64m2_t vd,
                                   vfloat64m2_t vs2, double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1up_mu(vbool16_t vm, vfloat64m4_t vd,
                                   vfloat64m4_t vs2, double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1up_mu(vbool8_t vm, vfloat64m8_t vd,
                                   vfloat64m8_t vs2, double rs1, size_t vl);
vfloat16mf4_t __riscv_vfslide1down_mu(vbool64_t vm, vfloat16mf4_t vd,
                                      vfloat16mf4_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat16mf2_t __riscv_vfslide1down_mu(vbool32_t vm, vfloat16mf2_t vd,
                                      vfloat16mf2_t vs2, _Float16 rs1,
                                      size_t vl);
vfloat16m1_t __riscv_vfslide1down_mu(vbool16_t vm, vfloat16m1_t vd,
                                     vfloat16m1_t vs2, _Float16 rs1, size_t vl);
vfloat16m2_t __riscv_vfslide1down_mu(vbool8_t vm, vfloat16m2_t vd,
                                     vfloat16m2_t vs2, _Float16 rs1, size_t vl);
vfloat16m4_t __riscv_vfslide1down_mu(vbool4_t vm, vfloat16m4_t vd,
                                     vfloat16m4_t vs2, _Float16 rs1, size_t vl);
vfloat16m8_t __riscv_vfslide1down_mu(vbool2_t vm, vfloat16m8_t vd,
                                     vfloat16m8_t vs2, _Float16 rs1, size_t vl);
vfloat32mf2_t __riscv_vfslide1down_mu(vbool64_t vm, vfloat32mf2_t vd,
                                      vfloat32mf2_t vs2, float rs1, size_t vl);
vfloat32m1_t __riscv_vfslide1down_mu(vbool32_t vm, vfloat32m1_t vd,
                                     vfloat32m1_t vs2, float rs1, size_t vl);
vfloat32m2_t __riscv_vfslide1down_mu(vbool16_t vm, vfloat32m2_t vd,
                                     vfloat32m2_t vs2, float rs1, size_t vl);
vfloat32m4_t __riscv_vfslide1down_mu(vbool8_t vm, vfloat32m4_t vd,
                                     vfloat32m4_t vs2, float rs1, size_t vl);
vfloat32m8_t __riscv_vfslide1down_mu(vbool4_t vm, vfloat32m8_t vd,
                                     vfloat32m8_t vs2, float rs1, size_t vl);
vfloat64m1_t __riscv_vfslide1down_mu(vbool64_t vm, vfloat64m1_t vd,
                                     vfloat64m1_t vs2, double rs1, size_t vl);
vfloat64m2_t __riscv_vfslide1down_mu(vbool32_t vm, vfloat64m2_t vd,
                                     vfloat64m2_t vs2, double rs1, size_t vl);
vfloat64m4_t __riscv_vfslide1down_mu(vbool16_t vm, vfloat64m4_t vd,
                                     vfloat64m4_t vs2, double rs1, size_t vl);
vfloat64m8_t __riscv_vfslide1down_mu(vbool8_t vm, vfloat64m8_t vd,
                                     vfloat64m8_t vs2, double rs1, size_t vl);
vint8mf8_t __riscv_vslide1up_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                int8_t rs1, size_t vl);
vint8mf4_t __riscv_vslide1up_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                int8_t rs1, size_t vl);
vint8mf2_t __riscv_vslide1up_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                int8_t rs1, size_t vl);
vint8m1_t __riscv_vslide1up_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                               int8_t rs1, size_t vl);
vint8m2_t __riscv_vslide1up_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                               int8_t rs1, size_t vl);
vint8m4_t __riscv_vslide1up_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                               int8_t rs1, size_t vl);
vint8m8_t __riscv_vslide1up_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                               int8_t rs1, size_t vl);
vint16mf4_t __riscv_vslide1up_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 int16_t rs1, size_t vl);
vint16mf2_t __riscv_vslide1up_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 int16_t rs1, size_t vl);
vint16m1_t __riscv_vslide1up_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                int16_t rs1, size_t vl);
vint16m2_t __riscv_vslide1up_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                int16_t rs1, size_t vl);
vint16m4_t __riscv_vslide1up_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                int16_t rs1, size_t vl);
vint16m8_t __riscv_vslide1up_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                int16_t rs1, size_t vl);
vint32mf2_t __riscv_vslide1up_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 int32_t rs1, size_t vl);
vint32m1_t __riscv_vslide1up_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                int32_t rs1, size_t vl);
vint32m2_t __riscv_vslide1up_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                int32_t rs1, size_t vl);
vint32m4_t __riscv_vslide1up_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                int32_t rs1, size_t vl);
vint32m8_t __riscv_vslide1up_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                int32_t rs1, size_t vl);
vint64m1_t __riscv_vslide1up_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                int64_t rs1, size_t vl);
vint64m2_t __riscv_vslide1up_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                int64_t rs1, size_t vl);
vint64m4_t __riscv_vslide1up_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                int64_t rs1, size_t vl);
vint64m8_t __riscv_vslide1up_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                int64_t rs1, size_t vl);
vint8mf8_t __riscv_vslide1down_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                  int8_t rs1, size_t vl);
vint8mf4_t __riscv_vslide1down_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                  int8_t rs1, size_t vl);
vint8mf2_t __riscv_vslide1down_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                  int8_t rs1, size_t vl);
vint8m1_t __riscv_vslide1down_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                 int8_t rs1, size_t vl);
vint8m2_t __riscv_vslide1down_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                 int8_t rs1, size_t vl);
vint8m4_t __riscv_vslide1down_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                 int8_t rs1, size_t vl);
vint8m8_t __riscv_vslide1down_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf4_t __riscv_vslide1down_mu(vbool64_t vm, vint16mf4_t vd,
                                   vint16mf4_t vs2, int16_t rs1, size_t vl);
vint16mf2_t __riscv_vslide1down_mu(vbool32_t vm, vint16mf2_t vd,
                                   vint16mf2_t vs2, int16_t rs1, size_t vl);
vint16m1_t __riscv_vslide1down_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                  int16_t rs1, size_t vl);
vint16m2_t __riscv_vslide1down_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                  int16_t rs1, size_t vl);
vint16m4_t __riscv_vslide1down_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                  int16_t rs1, size_t vl);
vint16m8_t __riscv_vslide1down_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                  int16_t rs1, size_t vl);
vint32mf2_t __riscv_vslide1down_mu(vbool64_t vm, vint32mf2_t vd,
                                   vint32mf2_t vs2, int32_t rs1, size_t vl);
vint32m1_t __riscv_vslide1down_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                  int32_t rs1, size_t vl);
vint32m2_t __riscv_vslide1down_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                  int32_t rs1, size_t vl);
vint32m4_t __riscv_vslide1down_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                  int32_t rs1, size_t vl);
vint32m8_t __riscv_vslide1down_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                  int32_t rs1, size_t vl);
vint64m1_t __riscv_vslide1down_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                  int64_t rs1, size_t vl);
vint64m2_t __riscv_vslide1down_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                  int64_t rs1, size_t vl);
vint64m4_t __riscv_vslide1down_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                  int64_t rs1, size_t vl);
vint64m8_t __riscv_vslide1down_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                  int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vslide1up_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                 uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vslide1up_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                 uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vslide1up_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vslide1up_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vslide1up_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vslide1up_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vslide1up_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vslide1up_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vslide1up_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vslide1up_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vslide1up_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vslide1up_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vslide1up_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vslide1up_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vslide1up_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vslide1up_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vslide1up_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vslide1up_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vslide1up_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vslide1up_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vslide1up_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vslide1up_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vslide1down_mu(vbool64_t vm, vuint8mf8_t vd,
                                   vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vslide1down_mu(vbool32_t vm, vuint8mf4_t vd,
                                   vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vslide1down_mu(vbool16_t vm, vuint8mf2_t vd,
                                   vuint8mf2_t vs2, uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vslide1down_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vslide1down_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vslide1down_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vslide1down_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vslide1down_mu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vslide1down_mu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vslide1down_mu(vbool16_t vm, vuint16m1_t vd,
                                   vuint16m1_t vs2, uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vslide1down_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                   uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vslide1down_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                   uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vslide1down_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vslide1down_mu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vslide1down_mu(vbool32_t vm, vuint32m1_t vd,
                                   vuint32m1_t vs2, uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vslide1down_mu(vbool16_t vm, vuint32m2_t vd,
                                   vuint32m2_t vs2, uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vslide1down_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                   uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vslide1down_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                   uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vslide1down_mu(vbool64_t vm, vuint64m1_t vd,
                                   vuint64m1_t vs2, uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vslide1down_mu(vbool32_t vm, vuint64m2_t vd,
                                   vuint64m2_t vs2, uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vslide1down_mu(vbool16_t vm, vuint64m4_t vd,
                                   vuint64m4_t vs2, uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vslide1down_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                   uint64_t rs1, size_t vl);

Vector Register Gather Intrinsics

vfloat16mf4_t __riscv_vrgather_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                  vuint16mf4_t vs1, size_t vl);
vfloat16mf4_t __riscv_vrgather_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                  size_t vs1, size_t vl);
vfloat16mf2_t __riscv_vrgather_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                  vuint16mf2_t vs1, size_t vl);
vfloat16mf2_t __riscv_vrgather_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                  size_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgather_tu(vfloat16m1_t vd, vfloat16m1_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgather_tu(vfloat16m1_t vd, vfloat16m1_t vs2, size_t vs1,
                                 size_t vl);
vfloat16m2_t __riscv_vrgather_tu(vfloat16m2_t vd, vfloat16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vfloat16m2_t __riscv_vrgather_tu(vfloat16m2_t vd, vfloat16m2_t vs2, size_t vs1,
                                 size_t vl);
vfloat16m4_t __riscv_vrgather_tu(vfloat16m4_t vd, vfloat16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vfloat16m4_t __riscv_vrgather_tu(vfloat16m4_t vd, vfloat16m4_t vs2, size_t vs1,
                                 size_t vl);
vfloat16m8_t __riscv_vrgather_tu(vfloat16m8_t vd, vfloat16m8_t vs2,
                                 vuint16m8_t vs1, size_t vl);
vfloat16m8_t __riscv_vrgather_tu(vfloat16m8_t vd, vfloat16m8_t vs2, size_t vs1,
                                 size_t vl);
vfloat32mf2_t __riscv_vrgather_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                  vuint32mf2_t vs1, size_t vl);
vfloat32mf2_t __riscv_vrgather_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                  size_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgather_tu(vfloat32m1_t vd, vfloat32m1_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgather_tu(vfloat32m1_t vd, vfloat32m1_t vs2, size_t vs1,
                                 size_t vl);
vfloat32m2_t __riscv_vrgather_tu(vfloat32m2_t vd, vfloat32m2_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vfloat32m2_t __riscv_vrgather_tu(vfloat32m2_t vd, vfloat32m2_t vs2, size_t vs1,
                                 size_t vl);
vfloat32m4_t __riscv_vrgather_tu(vfloat32m4_t vd, vfloat32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vfloat32m4_t __riscv_vrgather_tu(vfloat32m4_t vd, vfloat32m4_t vs2, size_t vs1,
                                 size_t vl);
vfloat32m8_t __riscv_vrgather_tu(vfloat32m8_t vd, vfloat32m8_t vs2,
                                 vuint32m8_t vs1, size_t vl);
vfloat32m8_t __riscv_vrgather_tu(vfloat32m8_t vd, vfloat32m8_t vs2, size_t vs1,
                                 size_t vl);
vfloat64m1_t __riscv_vrgather_tu(vfloat64m1_t vd, vfloat64m1_t vs2,
                                 vuint64m1_t vs1, size_t vl);
vfloat64m1_t __riscv_vrgather_tu(vfloat64m1_t vd, vfloat64m1_t vs2, size_t vs1,
                                 size_t vl);
vfloat64m2_t __riscv_vrgather_tu(vfloat64m2_t vd, vfloat64m2_t vs2,
                                 vuint64m2_t vs1, size_t vl);
vfloat64m2_t __riscv_vrgather_tu(vfloat64m2_t vd, vfloat64m2_t vs2, size_t vs1,
                                 size_t vl);
vfloat64m4_t __riscv_vrgather_tu(vfloat64m4_t vd, vfloat64m4_t vs2,
                                 vuint64m4_t vs1, size_t vl);
vfloat64m4_t __riscv_vrgather_tu(vfloat64m4_t vd, vfloat64m4_t vs2, size_t vs1,
                                 size_t vl);
vfloat64m8_t __riscv_vrgather_tu(vfloat64m8_t vd, vfloat64m8_t vs2,
                                 vuint64m8_t vs1, size_t vl);
vfloat64m8_t __riscv_vrgather_tu(vfloat64m8_t vd, vfloat64m8_t vs2, size_t vs1,
                                 size_t vl);
vfloat16mf4_t __riscv_vrgatherei16_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                      vuint16mf4_t vs1, size_t vl);
vfloat16mf2_t __riscv_vrgatherei16_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                      vuint16mf2_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgatherei16_tu(vfloat16m1_t vd, vfloat16m1_t vs2,
                                     vuint16m1_t vs1, size_t vl);
vfloat16m2_t __riscv_vrgatherei16_tu(vfloat16m2_t vd, vfloat16m2_t vs2,
                                     vuint16m2_t vs1, size_t vl);
vfloat16m4_t __riscv_vrgatherei16_tu(vfloat16m4_t vd, vfloat16m4_t vs2,
                                     vuint16m4_t vs1, size_t vl);
vfloat16m8_t __riscv_vrgatherei16_tu(vfloat16m8_t vd, vfloat16m8_t vs2,
                                     vuint16m8_t vs1, size_t vl);
vfloat32mf2_t __riscv_vrgatherei16_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                      vuint16mf4_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgatherei16_tu(vfloat32m1_t vd, vfloat32m1_t vs2,
                                     vuint16mf2_t vs1, size_t vl);
vfloat32m2_t __riscv_vrgatherei16_tu(vfloat32m2_t vd, vfloat32m2_t vs2,
                                     vuint16m1_t vs1, size_t vl);
vfloat32m4_t __riscv_vrgatherei16_tu(vfloat32m4_t vd, vfloat32m4_t vs2,
                                     vuint16m2_t vs1, size_t vl);
vfloat32m8_t __riscv_vrgatherei16_tu(vfloat32m8_t vd, vfloat32m8_t vs2,
                                     vuint16m4_t vs1, size_t vl);
vfloat64m1_t __riscv_vrgatherei16_tu(vfloat64m1_t vd, vfloat64m1_t vs2,
                                     vuint16mf4_t vs1, size_t vl);
vfloat64m2_t __riscv_vrgatherei16_tu(vfloat64m2_t vd, vfloat64m2_t vs2,
                                     vuint16mf2_t vs1, size_t vl);
vfloat64m4_t __riscv_vrgatherei16_tu(vfloat64m4_t vd, vfloat64m4_t vs2,
                                     vuint16m1_t vs1, size_t vl);
vfloat64m8_t __riscv_vrgatherei16_tu(vfloat64m8_t vd, vfloat64m8_t vs2,
                                     vuint16m2_t vs1, size_t vl);
vint8mf8_t __riscv_vrgather_tu(vint8mf8_t vd, vint8mf8_t vs2, vuint8mf8_t vs1,
                               size_t vl);
vint8mf8_t __riscv_vrgather_tu(vint8mf8_t vd, vint8mf8_t vs2, size_t vs1,
                               size_t vl);
vint8mf4_t __riscv_vrgather_tu(vint8mf4_t vd, vint8mf4_t vs2, vuint8mf4_t vs1,
                               size_t vl);
vint8mf4_t __riscv_vrgather_tu(vint8mf4_t vd, vint8mf4_t vs2, size_t vs1,
                               size_t vl);
vint8mf2_t __riscv_vrgather_tu(vint8mf2_t vd, vint8mf2_t vs2, vuint8mf2_t vs1,
                               size_t vl);
vint8mf2_t __riscv_vrgather_tu(vint8mf2_t vd, vint8mf2_t vs2, size_t vs1,
                               size_t vl);
vint8m1_t __riscv_vrgather_tu(vint8m1_t vd, vint8m1_t vs2, vuint8m1_t vs1,
                              size_t vl);
vint8m1_t __riscv_vrgather_tu(vint8m1_t vd, vint8m1_t vs2, size_t vs1,
                              size_t vl);
vint8m2_t __riscv_vrgather_tu(vint8m2_t vd, vint8m2_t vs2, vuint8m2_t vs1,
                              size_t vl);
vint8m2_t __riscv_vrgather_tu(vint8m2_t vd, vint8m2_t vs2, size_t vs1,
                              size_t vl);
vint8m4_t __riscv_vrgather_tu(vint8m4_t vd, vint8m4_t vs2, vuint8m4_t vs1,
                              size_t vl);
vint8m4_t __riscv_vrgather_tu(vint8m4_t vd, vint8m4_t vs2, size_t vs1,
                              size_t vl);
vint8m8_t __riscv_vrgather_tu(vint8m8_t vd, vint8m8_t vs2, vuint8m8_t vs1,
                              size_t vl);
vint8m8_t __riscv_vrgather_tu(vint8m8_t vd, vint8m8_t vs2, size_t vs1,
                              size_t vl);
vint16mf4_t __riscv_vrgather_tu(vint16mf4_t vd, vint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vrgather_tu(vint16mf4_t vd, vint16mf4_t vs2, size_t vs1,
                                size_t vl);
vint16mf2_t __riscv_vrgather_tu(vint16mf2_t vd, vint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vrgather_tu(vint16mf2_t vd, vint16mf2_t vs2, size_t vs1,
                                size_t vl);
vint16m1_t __riscv_vrgather_tu(vint16m1_t vd, vint16m1_t vs2, vuint16m1_t vs1,
                               size_t vl);
vint16m1_t __riscv_vrgather_tu(vint16m1_t vd, vint16m1_t vs2, size_t vs1,
                               size_t vl);
vint16m2_t __riscv_vrgather_tu(vint16m2_t vd, vint16m2_t vs2, vuint16m2_t vs1,
                               size_t vl);
vint16m2_t __riscv_vrgather_tu(vint16m2_t vd, vint16m2_t vs2, size_t vs1,
                               size_t vl);
vint16m4_t __riscv_vrgather_tu(vint16m4_t vd, vint16m4_t vs2, vuint16m4_t vs1,
                               size_t vl);
vint16m4_t __riscv_vrgather_tu(vint16m4_t vd, vint16m4_t vs2, size_t vs1,
                               size_t vl);
vint16m8_t __riscv_vrgather_tu(vint16m8_t vd, vint16m8_t vs2, vuint16m8_t vs1,
                               size_t vl);
vint16m8_t __riscv_vrgather_tu(vint16m8_t vd, vint16m8_t vs2, size_t vs1,
                               size_t vl);
vint32mf2_t __riscv_vrgather_tu(vint32mf2_t vd, vint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vrgather_tu(vint32mf2_t vd, vint32mf2_t vs2, size_t vs1,
                                size_t vl);
vint32m1_t __riscv_vrgather_tu(vint32m1_t vd, vint32m1_t vs2, vuint32m1_t vs1,
                               size_t vl);
vint32m1_t __riscv_vrgather_tu(vint32m1_t vd, vint32m1_t vs2, size_t vs1,
                               size_t vl);
vint32m2_t __riscv_vrgather_tu(vint32m2_t vd, vint32m2_t vs2, vuint32m2_t vs1,
                               size_t vl);
vint32m2_t __riscv_vrgather_tu(vint32m2_t vd, vint32m2_t vs2, size_t vs1,
                               size_t vl);
vint32m4_t __riscv_vrgather_tu(vint32m4_t vd, vint32m4_t vs2, vuint32m4_t vs1,
                               size_t vl);
vint32m4_t __riscv_vrgather_tu(vint32m4_t vd, vint32m4_t vs2, size_t vs1,
                               size_t vl);
vint32m8_t __riscv_vrgather_tu(vint32m8_t vd, vint32m8_t vs2, vuint32m8_t vs1,
                               size_t vl);
vint32m8_t __riscv_vrgather_tu(vint32m8_t vd, vint32m8_t vs2, size_t vs1,
                               size_t vl);
vint64m1_t __riscv_vrgather_tu(vint64m1_t vd, vint64m1_t vs2, vuint64m1_t vs1,
                               size_t vl);
vint64m1_t __riscv_vrgather_tu(vint64m1_t vd, vint64m1_t vs2, size_t vs1,
                               size_t vl);
vint64m2_t __riscv_vrgather_tu(vint64m2_t vd, vint64m2_t vs2, vuint64m2_t vs1,
                               size_t vl);
vint64m2_t __riscv_vrgather_tu(vint64m2_t vd, vint64m2_t vs2, size_t vs1,
                               size_t vl);
vint64m4_t __riscv_vrgather_tu(vint64m4_t vd, vint64m4_t vs2, vuint64m4_t vs1,
                               size_t vl);
vint64m4_t __riscv_vrgather_tu(vint64m4_t vd, vint64m4_t vs2, size_t vs1,
                               size_t vl);
vint64m8_t __riscv_vrgather_tu(vint64m8_t vd, vint64m8_t vs2, vuint64m8_t vs1,
                               size_t vl);
vint64m8_t __riscv_vrgather_tu(vint64m8_t vd, vint64m8_t vs2, size_t vs1,
                               size_t vl);
vint8mf8_t __riscv_vrgatherei16_tu(vint8mf8_t vd, vint8mf8_t vs2,
                                   vuint16mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrgatherei16_tu(vint8mf4_t vd, vint8mf4_t vs2,
                                   vuint16mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrgatherei16_tu(vint8mf2_t vd, vint8mf2_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint8m1_t __riscv_vrgatherei16_tu(vint8m1_t vd, vint8m1_t vs2, vuint16m2_t vs1,
                                  size_t vl);
vint8m2_t __riscv_vrgatherei16_tu(vint8m2_t vd, vint8m2_t vs2, vuint16m4_t vs1,
                                  size_t vl);
vint8m4_t __riscv_vrgatherei16_tu(vint8m4_t vd, vint8m4_t vs2, vuint16m8_t vs1,
                                  size_t vl);
vint16mf4_t __riscv_vrgatherei16_tu(vint16mf4_t vd, vint16mf4_t vs2,
                                    vuint16mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vrgatherei16_tu(vint16mf2_t vd, vint16mf2_t vs2,
                                    vuint16mf2_t vs1, size_t vl);
vint16m1_t __riscv_vrgatherei16_tu(vint16m1_t vd, vint16m1_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint16m2_t __riscv_vrgatherei16_tu(vint16m2_t vd, vint16m2_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vint16m4_t __riscv_vrgatherei16_tu(vint16m4_t vd, vint16m4_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vint16m8_t __riscv_vrgatherei16_tu(vint16m8_t vd, vint16m8_t vs2,
                                   vuint16m8_t vs1, size_t vl);
vint32mf2_t __riscv_vrgatherei16_tu(vint32mf2_t vd, vint32mf2_t vs2,
                                    vuint16mf4_t vs1, size_t vl);
vint32m1_t __riscv_vrgatherei16_tu(vint32m1_t vd, vint32m1_t vs2,
                                   vuint16mf2_t vs1, size_t vl);
vint32m2_t __riscv_vrgatherei16_tu(vint32m2_t vd, vint32m2_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint32m4_t __riscv_vrgatherei16_tu(vint32m4_t vd, vint32m4_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vint32m8_t __riscv_vrgatherei16_tu(vint32m8_t vd, vint32m8_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vint64m1_t __riscv_vrgatherei16_tu(vint64m1_t vd, vint64m1_t vs2,
                                   vuint16mf4_t vs1, size_t vl);
vint64m2_t __riscv_vrgatherei16_tu(vint64m2_t vd, vint64m2_t vs2,
                                   vuint16mf2_t vs1, size_t vl);
vint64m4_t __riscv_vrgatherei16_tu(vint64m4_t vd, vint64m4_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint64m8_t __riscv_vrgatherei16_tu(vint64m8_t vd, vint64m8_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_tu(vuint8mf8_t vd, vuint8mf8_t vs2,
                                vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_tu(vuint8mf8_t vd, vuint8mf8_t vs2, size_t vs1,
                                size_t vl);
vuint8mf4_t __riscv_vrgather_tu(vuint8mf4_t vd, vuint8mf4_t vs2,
                                vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgather_tu(vuint8mf4_t vd, vuint8mf4_t vs2, size_t vs1,
                                size_t vl);
vuint8mf2_t __riscv_vrgather_tu(vuint8mf2_t vd, vuint8mf2_t vs2,
                                vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgather_tu(vuint8mf2_t vd, vuint8mf2_t vs2, size_t vs1,
                                size_t vl);
vuint8m1_t __riscv_vrgather_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                               size_t vl);
vuint8m1_t __riscv_vrgather_tu(vuint8m1_t vd, vuint8m1_t vs2, size_t vs1,
                               size_t vl);
vuint8m2_t __riscv_vrgather_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                               size_t vl);
vuint8m2_t __riscv_vrgather_tu(vuint8m2_t vd, vuint8m2_t vs2, size_t vs1,
                               size_t vl);
vuint8m4_t __riscv_vrgather_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                               size_t vl);
vuint8m4_t __riscv_vrgather_tu(vuint8m4_t vd, vuint8m4_t vs2, size_t vs1,
                               size_t vl);
vuint8m8_t __riscv_vrgather_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                               size_t vl);
vuint8m8_t __riscv_vrgather_tu(vuint8m8_t vd, vuint8m8_t vs2, size_t vs1,
                               size_t vl);
vuint16mf4_t __riscv_vrgather_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                 vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgather_tu(vuint16mf4_t vd, vuint16mf4_t vs2, size_t vs1,
                                 size_t vl);
vuint16mf2_t __riscv_vrgather_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vrgather_tu(vuint16mf2_t vd, vuint16mf2_t vs2, size_t vs1,
                                 size_t vl);
vuint16m1_t __riscv_vrgather_tu(vuint16m1_t vd, vuint16m1_t vs2,
                                vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vrgather_tu(vuint16m1_t vd, vuint16m1_t vs2, size_t vs1,
                                size_t vl);
vuint16m2_t __riscv_vrgather_tu(vuint16m2_t vd, vuint16m2_t vs2,
                                vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vrgather_tu(vuint16m2_t vd, vuint16m2_t vs2, size_t vs1,
                                size_t vl);
vuint16m4_t __riscv_vrgather_tu(vuint16m4_t vd, vuint16m4_t vs2,
                                vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vrgather_tu(vuint16m4_t vd, vuint16m4_t vs2, size_t vs1,
                                size_t vl);
vuint16m8_t __riscv_vrgather_tu(vuint16m8_t vd, vuint16m8_t vs2,
                                vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vrgather_tu(vuint16m8_t vd, vuint16m8_t vs2, size_t vs1,
                                size_t vl);
vuint32mf2_t __riscv_vrgather_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vrgather_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vs1,
                                 size_t vl);
vuint32m1_t __riscv_vrgather_tu(vuint32m1_t vd, vuint32m1_t vs2,
                                vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vrgather_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vs1,
                                size_t vl);
vuint32m2_t __riscv_vrgather_tu(vuint32m2_t vd, vuint32m2_t vs2,
                                vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vrgather_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t vs1,
                                size_t vl);
vuint32m4_t __riscv_vrgather_tu(vuint32m4_t vd, vuint32m4_t vs2,
                                vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vrgather_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vs1,
                                size_t vl);
vuint32m8_t __riscv_vrgather_tu(vuint32m8_t vd, vuint32m8_t vs2,
                                vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vrgather_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t vs1,
                                size_t vl);
vuint64m1_t __riscv_vrgather_tu(vuint64m1_t vd, vuint64m1_t vs2,
                                vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vrgather_tu(vuint64m1_t vd, vuint64m1_t vs2, size_t vs1,
                                size_t vl);
vuint64m2_t __riscv_vrgather_tu(vuint64m2_t vd, vuint64m2_t vs2,
                                vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vrgather_tu(vuint64m2_t vd, vuint64m2_t vs2, size_t vs1,
                                size_t vl);
vuint64m4_t __riscv_vrgather_tu(vuint64m4_t vd, vuint64m4_t vs2,
                                vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vrgather_tu(vuint64m4_t vd, vuint64m4_t vs2, size_t vs1,
                                size_t vl);
vuint64m8_t __riscv_vrgather_tu(vuint64m8_t vd, vuint64m8_t vs2,
                                vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vrgather_tu(vuint64m8_t vd, vuint64m8_t vs2, size_t vs1,
                                size_t vl);
vuint8mf8_t __riscv_vrgatherei16_tu(vuint8mf8_t vd, vuint8mf8_t vs2,
                                    vuint16mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgatherei16_tu(vuint8mf4_t vd, vuint8mf4_t vs2,
                                    vuint16mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgatherei16_tu(vuint8mf2_t vd, vuint8mf2_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vuint8m1_t __riscv_vrgatherei16_tu(vuint8m1_t vd, vuint8m1_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint8m2_t __riscv_vrgatherei16_tu(vuint8m2_t vd, vuint8m2_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vuint8m4_t __riscv_vrgatherei16_tu(vuint8m4_t vd, vuint8m4_t vs2,
                                   vuint16m8_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgatherei16_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                     vuint16mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vrgatherei16_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                     vuint16mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vrgatherei16_tu(vuint16m1_t vd, vuint16m1_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vuint16m2_t __riscv_vrgatherei16_tu(vuint16m2_t vd, vuint16m2_t vs2,
                                    vuint16m2_t vs1, size_t vl);
vuint16m4_t __riscv_vrgatherei16_tu(vuint16m4_t vd, vuint16m4_t vs2,
                                    vuint16m4_t vs1, size_t vl);
vuint16m8_t __riscv_vrgatherei16_tu(vuint16m8_t vd, vuint16m8_t vs2,
                                    vuint16m8_t vs1, size_t vl);
vuint32mf2_t __riscv_vrgatherei16_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                     vuint16mf4_t vs1, size_t vl);
vuint32m1_t __riscv_vrgatherei16_tu(vuint32m1_t vd, vuint32m1_t vs2,
                                    vuint16mf2_t vs1, size_t vl);
vuint32m2_t __riscv_vrgatherei16_tu(vuint32m2_t vd, vuint32m2_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vuint32m4_t __riscv_vrgatherei16_tu(vuint32m4_t vd, vuint32m4_t vs2,
                                    vuint16m2_t vs1, size_t vl);
vuint32m8_t __riscv_vrgatherei16_tu(vuint32m8_t vd, vuint32m8_t vs2,
                                    vuint16m4_t vs1, size_t vl);
vuint64m1_t __riscv_vrgatherei16_tu(vuint64m1_t vd, vuint64m1_t vs2,
                                    vuint16mf4_t vs1, size_t vl);
vuint64m2_t __riscv_vrgatherei16_tu(vuint64m2_t vd, vuint64m2_t vs2,
                                    vuint16mf2_t vs1, size_t vl);
vuint64m4_t __riscv_vrgatherei16_tu(vuint64m4_t vd, vuint64m4_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vuint64m8_t __riscv_vrgatherei16_tu(vuint64m8_t vd, vuint64m8_t vs2,
                                    vuint16m2_t vs1, size_t vl);
// masked functions
vfloat16mf4_t __riscv_vrgather_tum(vbool64_t vm, vfloat16mf4_t vd,
                                   vfloat16mf4_t vs2, vuint16mf4_t vs1,
                                   size_t vl);
vfloat16mf4_t __riscv_vrgather_tum(vbool64_t vm, vfloat16mf4_t vd,
                                   vfloat16mf4_t vs2, size_t vs1, size_t vl);
vfloat16mf2_t __riscv_vrgather_tum(vbool32_t vm, vfloat16mf2_t vd,
                                   vfloat16mf2_t vs2, vuint16mf2_t vs1,
                                   size_t vl);
vfloat16mf2_t __riscv_vrgather_tum(vbool32_t vm, vfloat16mf2_t vd,
                                   vfloat16mf2_t vs2, size_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgather_tum(vbool16_t vm, vfloat16m1_t vd,
                                  vfloat16m1_t vs2, vuint16m1_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgather_tum(vbool16_t vm, vfloat16m1_t vd,
                                  vfloat16m1_t vs2, size_t vs1, size_t vl);
vfloat16m2_t __riscv_vrgather_tum(vbool8_t vm, vfloat16m2_t vd,
                                  vfloat16m2_t vs2, vuint16m2_t vs1, size_t vl);
vfloat16m2_t __riscv_vrgather_tum(vbool8_t vm, vfloat16m2_t vd,
                                  vfloat16m2_t vs2, size_t vs1, size_t vl);
vfloat16m4_t __riscv_vrgather_tum(vbool4_t vm, vfloat16m4_t vd,
                                  vfloat16m4_t vs2, vuint16m4_t vs1, size_t vl);
vfloat16m4_t __riscv_vrgather_tum(vbool4_t vm, vfloat16m4_t vd,
                                  vfloat16m4_t vs2, size_t vs1, size_t vl);
vfloat16m8_t __riscv_vrgather_tum(vbool2_t vm, vfloat16m8_t vd,
                                  vfloat16m8_t vs2, vuint16m8_t vs1, size_t vl);
vfloat16m8_t __riscv_vrgather_tum(vbool2_t vm, vfloat16m8_t vd,
                                  vfloat16m8_t vs2, size_t vs1, size_t vl);
vfloat32mf2_t __riscv_vrgather_tum(vbool64_t vm, vfloat32mf2_t vd,
                                   vfloat32mf2_t vs2, vuint32mf2_t vs1,
                                   size_t vl);
vfloat32mf2_t __riscv_vrgather_tum(vbool64_t vm, vfloat32mf2_t vd,
                                   vfloat32mf2_t vs2, size_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgather_tum(vbool32_t vm, vfloat32m1_t vd,
                                  vfloat32m1_t vs2, vuint32m1_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgather_tum(vbool32_t vm, vfloat32m1_t vd,
                                  vfloat32m1_t vs2, size_t vs1, size_t vl);
vfloat32m2_t __riscv_vrgather_tum(vbool16_t vm, vfloat32m2_t vd,
                                  vfloat32m2_t vs2, vuint32m2_t vs1, size_t vl);
vfloat32m2_t __riscv_vrgather_tum(vbool16_t vm, vfloat32m2_t vd,
                                  vfloat32m2_t vs2, size_t vs1, size_t vl);
vfloat32m4_t __riscv_vrgather_tum(vbool8_t vm, vfloat32m4_t vd,
                                  vfloat32m4_t vs2, vuint32m4_t vs1, size_t vl);
vfloat32m4_t __riscv_vrgather_tum(vbool8_t vm, vfloat32m4_t vd,
                                  vfloat32m4_t vs2, size_t vs1, size_t vl);
vfloat32m8_t __riscv_vrgather_tum(vbool4_t vm, vfloat32m8_t vd,
                                  vfloat32m8_t vs2, vuint32m8_t vs1, size_t vl);
vfloat32m8_t __riscv_vrgather_tum(vbool4_t vm, vfloat32m8_t vd,
                                  vfloat32m8_t vs2, size_t vs1, size_t vl);
vfloat64m1_t __riscv_vrgather_tum(vbool64_t vm, vfloat64m1_t vd,
                                  vfloat64m1_t vs2, vuint64m1_t vs1, size_t vl);
vfloat64m1_t __riscv_vrgather_tum(vbool64_t vm, vfloat64m1_t vd,
                                  vfloat64m1_t vs2, size_t vs1, size_t vl);
vfloat64m2_t __riscv_vrgather_tum(vbool32_t vm, vfloat64m2_t vd,
                                  vfloat64m2_t vs2, vuint64m2_t vs1, size_t vl);
vfloat64m2_t __riscv_vrgather_tum(vbool32_t vm, vfloat64m2_t vd,
                                  vfloat64m2_t vs2, size_t vs1, size_t vl);
vfloat64m4_t __riscv_vrgather_tum(vbool16_t vm, vfloat64m4_t vd,
                                  vfloat64m4_t vs2, vuint64m4_t vs1, size_t vl);
vfloat64m4_t __riscv_vrgather_tum(vbool16_t vm, vfloat64m4_t vd,
                                  vfloat64m4_t vs2, size_t vs1, size_t vl);
vfloat64m8_t __riscv_vrgather_tum(vbool8_t vm, vfloat64m8_t vd,
                                  vfloat64m8_t vs2, vuint64m8_t vs1, size_t vl);
vfloat64m8_t __riscv_vrgather_tum(vbool8_t vm, vfloat64m8_t vd,
                                  vfloat64m8_t vs2, size_t vs1, size_t vl);
vfloat16mf4_t __riscv_vrgatherei16_tum(vbool64_t vm, vfloat16mf4_t vd,
                                       vfloat16mf4_t vs2, vuint16mf4_t vs1,
                                       size_t vl);
vfloat16mf2_t __riscv_vrgatherei16_tum(vbool32_t vm, vfloat16mf2_t vd,
                                       vfloat16mf2_t vs2, vuint16mf2_t vs1,
                                       size_t vl);
vfloat16m1_t __riscv_vrgatherei16_tum(vbool16_t vm, vfloat16m1_t vd,
                                      vfloat16m1_t vs2, vuint16m1_t vs1,
                                      size_t vl);
vfloat16m2_t __riscv_vrgatherei16_tum(vbool8_t vm, vfloat16m2_t vd,
                                      vfloat16m2_t vs2, vuint16m2_t vs1,
                                      size_t vl);
vfloat16m4_t __riscv_vrgatherei16_tum(vbool4_t vm, vfloat16m4_t vd,
                                      vfloat16m4_t vs2, vuint16m4_t vs1,
                                      size_t vl);
vfloat16m8_t __riscv_vrgatherei16_tum(vbool2_t vm, vfloat16m8_t vd,
                                      vfloat16m8_t vs2, vuint16m8_t vs1,
                                      size_t vl);
vfloat32mf2_t __riscv_vrgatherei16_tum(vbool64_t vm, vfloat32mf2_t vd,
                                       vfloat32mf2_t vs2, vuint16mf4_t vs1,
                                       size_t vl);
vfloat32m1_t __riscv_vrgatherei16_tum(vbool32_t vm, vfloat32m1_t vd,
                                      vfloat32m1_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vfloat32m2_t __riscv_vrgatherei16_tum(vbool16_t vm, vfloat32m2_t vd,
                                      vfloat32m2_t vs2, vuint16m1_t vs1,
                                      size_t vl);
vfloat32m4_t __riscv_vrgatherei16_tum(vbool8_t vm, vfloat32m4_t vd,
                                      vfloat32m4_t vs2, vuint16m2_t vs1,
                                      size_t vl);
vfloat32m8_t __riscv_vrgatherei16_tum(vbool4_t vm, vfloat32m8_t vd,
                                      vfloat32m8_t vs2, vuint16m4_t vs1,
                                      size_t vl);
vfloat64m1_t __riscv_vrgatherei16_tum(vbool64_t vm, vfloat64m1_t vd,
                                      vfloat64m1_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vfloat64m2_t __riscv_vrgatherei16_tum(vbool32_t vm, vfloat64m2_t vd,
                                      vfloat64m2_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vfloat64m4_t __riscv_vrgatherei16_tum(vbool16_t vm, vfloat64m4_t vd,
                                      vfloat64m4_t vs2, vuint16m1_t vs1,
                                      size_t vl);
vfloat64m8_t __riscv_vrgatherei16_tum(vbool8_t vm, vfloat64m8_t vd,
                                      vfloat64m8_t vs2, vuint16m2_t vs1,
                                      size_t vl);
vint8mf8_t __riscv_vrgather_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vrgather_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                size_t vs1, size_t vl);
vint8mf4_t __riscv_vrgather_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrgather_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                size_t vs1, size_t vl);
vint8mf2_t __riscv_vrgather_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrgather_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                size_t vs1, size_t vl);
vint8m1_t __riscv_vrgather_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                               vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vrgather_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                               size_t vs1, size_t vl);
vint8m2_t __riscv_vrgather_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                               vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vrgather_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                               size_t vs1, size_t vl);
vint8m4_t __riscv_vrgather_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                               vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vrgather_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                               size_t vs1, size_t vl);
vint8m8_t __riscv_vrgather_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                               vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vrgather_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                               size_t vs1, size_t vl);
vint16mf4_t __riscv_vrgather_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vrgather_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 size_t vs1, size_t vl);
vint16mf2_t __riscv_vrgather_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vrgather_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 size_t vs1, size_t vl);
vint16m1_t __riscv_vrgather_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vrgather_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                size_t vs1, size_t vl);
vint16m2_t __riscv_vrgather_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vrgather_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                size_t vs1, size_t vl);
vint16m4_t __riscv_vrgather_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vrgather_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                size_t vs1, size_t vl);
vint16m8_t __riscv_vrgather_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vrgather_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                size_t vs1, size_t vl);
vint32mf2_t __riscv_vrgather_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vrgather_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 size_t vs1, size_t vl);
vint32m1_t __riscv_vrgather_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vrgather_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                size_t vs1, size_t vl);
vint32m2_t __riscv_vrgather_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vrgather_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                size_t vs1, size_t vl);
vint32m4_t __riscv_vrgather_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vrgather_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                size_t vs1, size_t vl);
vint32m8_t __riscv_vrgather_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vrgather_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                size_t vs1, size_t vl);
vint64m1_t __riscv_vrgather_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vrgather_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                size_t vs1, size_t vl);
vint64m2_t __riscv_vrgather_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vrgather_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                size_t vs1, size_t vl);
vint64m4_t __riscv_vrgather_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vrgather_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                size_t vs1, size_t vl);
vint64m8_t __riscv_vrgather_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vrgather_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                size_t vs1, size_t vl);
vint8mf8_t __riscv_vrgatherei16_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                    vuint16mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrgatherei16_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                    vuint16mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrgatherei16_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vint8m1_t __riscv_vrgatherei16_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vint8m2_t __riscv_vrgatherei16_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vint8m4_t __riscv_vrgatherei16_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                   vuint16m8_t vs1, size_t vl);
vint16mf4_t __riscv_vrgatherei16_tum(vbool64_t vm, vint16mf4_t vd,
                                     vint16mf4_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vint16mf2_t __riscv_vrgatherei16_tum(vbool32_t vm, vint16mf2_t vd,
                                     vint16mf2_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vint16m1_t __riscv_vrgatherei16_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vint16m2_t __riscv_vrgatherei16_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                    vuint16m2_t vs1, size_t vl);
vint16m4_t __riscv_vrgatherei16_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                    vuint16m4_t vs1, size_t vl);
vint16m8_t __riscv_vrgatherei16_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                    vuint16m8_t vs1, size_t vl);
vint32mf2_t __riscv_vrgatherei16_tum(vbool64_t vm, vint32mf2_t vd,
                                     vint32mf2_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vint32m1_t __riscv_vrgatherei16_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                    vuint16mf2_t vs1, size_t vl);
vint32m2_t __riscv_vrgatherei16_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vint32m4_t __riscv_vrgatherei16_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                    vuint16m2_t vs1, size_t vl);
vint32m8_t __riscv_vrgatherei16_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                    vuint16m4_t vs1, size_t vl);
vint64m1_t __riscv_vrgatherei16_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                    vuint16mf4_t vs1, size_t vl);
vint64m2_t __riscv_vrgatherei16_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                    vuint16mf2_t vs1, size_t vl);
vint64m4_t __riscv_vrgatherei16_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                    vuint16m1_t vs1, size_t vl);
vint64m8_t __riscv_vrgatherei16_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                    vuint16m2_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                 vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                 size_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgather_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                 vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgather_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                 size_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgather_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgather_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                 size_t vs1, size_t vl);
vuint8m1_t __riscv_vrgather_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vrgather_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                size_t vs1, size_t vl);
vuint8m2_t __riscv_vrgather_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vrgather_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                size_t vs1, size_t vl);
vuint8m4_t __riscv_vrgather_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vrgather_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                size_t vs1, size_t vl);
vuint8m8_t __riscv_vrgather_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vrgather_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                size_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgather_tum(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, vuint16mf4_t vs1,
                                  size_t vl);
vuint16mf4_t __riscv_vrgather_tum(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, size_t vs1, size_t vl);
vuint16mf2_t __riscv_vrgather_tum(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, vuint16mf2_t vs1,
                                  size_t vl);
vuint16mf2_t __riscv_vrgather_tum(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, size_t vs1, size_t vl);
vuint16m1_t __riscv_vrgather_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vrgather_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 size_t vs1, size_t vl);
vuint16m2_t __riscv_vrgather_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vrgather_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 size_t vs1, size_t vl);
vuint16m4_t __riscv_vrgather_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vrgather_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 size_t vs1, size_t vl);
vuint16m8_t __riscv_vrgather_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vrgather_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 size_t vs1, size_t vl);
vuint32mf2_t __riscv_vrgather_tum(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, vuint32mf2_t vs1,
                                  size_t vl);
vuint32mf2_t __riscv_vrgather_tum(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, size_t vs1, size_t vl);
vuint32m1_t __riscv_vrgather_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vrgather_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 size_t vs1, size_t vl);
vuint32m2_t __riscv_vrgather_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vrgather_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 size_t vs1, size_t vl);
vuint32m4_t __riscv_vrgather_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vrgather_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 size_t vs1, size_t vl);
vuint32m8_t __riscv_vrgather_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vrgather_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 size_t vs1, size_t vl);
vuint64m1_t __riscv_vrgather_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vrgather_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 size_t vs1, size_t vl);
vuint64m2_t __riscv_vrgather_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vrgather_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 size_t vs1, size_t vl);
vuint64m4_t __riscv_vrgather_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vrgather_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 size_t vs1, size_t vl);
vuint64m8_t __riscv_vrgather_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vrgather_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 size_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgatherei16_tum(vbool64_t vm, vuint8mf8_t vd,
                                     vuint8mf8_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vuint8mf4_t __riscv_vrgatherei16_tum(vbool32_t vm, vuint8mf4_t vd,
                                     vuint8mf4_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vuint8mf2_t __riscv_vrgatherei16_tum(vbool16_t vm, vuint8mf2_t vd,
                                     vuint8mf2_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vuint8m1_t __riscv_vrgatherei16_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                    vuint16m2_t vs1, size_t vl);
vuint8m2_t __riscv_vrgatherei16_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                    vuint16m4_t vs1, size_t vl);
vuint8m4_t __riscv_vrgatherei16_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                    vuint16m8_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgatherei16_tum(vbool64_t vm, vuint16mf4_t vd,
                                      vuint16mf4_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vuint16mf2_t __riscv_vrgatherei16_tum(vbool32_t vm, vuint16mf2_t vd,
                                      vuint16mf2_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vuint16m1_t __riscv_vrgatherei16_tum(vbool16_t vm, vuint16m1_t vd,
                                     vuint16m1_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vuint16m2_t __riscv_vrgatherei16_tum(vbool8_t vm, vuint16m2_t vd,
                                     vuint16m2_t vs2, vuint16m2_t vs1,
                                     size_t vl);
vuint16m4_t __riscv_vrgatherei16_tum(vbool4_t vm, vuint16m4_t vd,
                                     vuint16m4_t vs2, vuint16m4_t vs1,
                                     size_t vl);
vuint16m8_t __riscv_vrgatherei16_tum(vbool2_t vm, vuint16m8_t vd,
                                     vuint16m8_t vs2, vuint16m8_t vs1,
                                     size_t vl);
vuint32mf2_t __riscv_vrgatherei16_tum(vbool64_t vm, vuint32mf2_t vd,
                                      vuint32mf2_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vuint32m1_t __riscv_vrgatherei16_tum(vbool32_t vm, vuint32m1_t vd,
                                     vuint32m1_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vuint32m2_t __riscv_vrgatherei16_tum(vbool16_t vm, vuint32m2_t vd,
                                     vuint32m2_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vuint32m4_t __riscv_vrgatherei16_tum(vbool8_t vm, vuint32m4_t vd,
                                     vuint32m4_t vs2, vuint16m2_t vs1,
                                     size_t vl);
vuint32m8_t __riscv_vrgatherei16_tum(vbool4_t vm, vuint32m8_t vd,
                                     vuint32m8_t vs2, vuint16m4_t vs1,
                                     size_t vl);
vuint64m1_t __riscv_vrgatherei16_tum(vbool64_t vm, vuint64m1_t vd,
                                     vuint64m1_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vuint64m2_t __riscv_vrgatherei16_tum(vbool32_t vm, vuint64m2_t vd,
                                     vuint64m2_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vuint64m4_t __riscv_vrgatherei16_tum(vbool16_t vm, vuint64m4_t vd,
                                     vuint64m4_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vuint64m8_t __riscv_vrgatherei16_tum(vbool8_t vm, vuint64m8_t vd,
                                     vuint64m8_t vs2, vuint16m2_t vs1,
                                     size_t vl);
// masked functions
vfloat16mf4_t __riscv_vrgather_tumu(vbool64_t vm, vfloat16mf4_t vd,
                                    vfloat16mf4_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vfloat16mf4_t __riscv_vrgather_tumu(vbool64_t vm, vfloat16mf4_t vd,
                                    vfloat16mf4_t vs2, size_t vs1, size_t vl);
vfloat16mf2_t __riscv_vrgather_tumu(vbool32_t vm, vfloat16mf2_t vd,
                                    vfloat16mf2_t vs2, vuint16mf2_t vs1,
                                    size_t vl);
vfloat16mf2_t __riscv_vrgather_tumu(vbool32_t vm, vfloat16mf2_t vd,
                                    vfloat16mf2_t vs2, size_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgather_tumu(vbool16_t vm, vfloat16m1_t vd,
                                   vfloat16m1_t vs2, vuint16m1_t vs1,
                                   size_t vl);
vfloat16m1_t __riscv_vrgather_tumu(vbool16_t vm, vfloat16m1_t vd,
                                   vfloat16m1_t vs2, size_t vs1, size_t vl);
vfloat16m2_t __riscv_vrgather_tumu(vbool8_t vm, vfloat16m2_t vd,
                                   vfloat16m2_t vs2, vuint16m2_t vs1,
                                   size_t vl);
vfloat16m2_t __riscv_vrgather_tumu(vbool8_t vm, vfloat16m2_t vd,
                                   vfloat16m2_t vs2, size_t vs1, size_t vl);
vfloat16m4_t __riscv_vrgather_tumu(vbool4_t vm, vfloat16m4_t vd,
                                   vfloat16m4_t vs2, vuint16m4_t vs1,
                                   size_t vl);
vfloat16m4_t __riscv_vrgather_tumu(vbool4_t vm, vfloat16m4_t vd,
                                   vfloat16m4_t vs2, size_t vs1, size_t vl);
vfloat16m8_t __riscv_vrgather_tumu(vbool2_t vm, vfloat16m8_t vd,
                                   vfloat16m8_t vs2, vuint16m8_t vs1,
                                   size_t vl);
vfloat16m8_t __riscv_vrgather_tumu(vbool2_t vm, vfloat16m8_t vd,
                                   vfloat16m8_t vs2, size_t vs1, size_t vl);
vfloat32mf2_t __riscv_vrgather_tumu(vbool64_t vm, vfloat32mf2_t vd,
                                    vfloat32mf2_t vs2, vuint32mf2_t vs1,
                                    size_t vl);
vfloat32mf2_t __riscv_vrgather_tumu(vbool64_t vm, vfloat32mf2_t vd,
                                    vfloat32mf2_t vs2, size_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgather_tumu(vbool32_t vm, vfloat32m1_t vd,
                                   vfloat32m1_t vs2, vuint32m1_t vs1,
                                   size_t vl);
vfloat32m1_t __riscv_vrgather_tumu(vbool32_t vm, vfloat32m1_t vd,
                                   vfloat32m1_t vs2, size_t vs1, size_t vl);
vfloat32m2_t __riscv_vrgather_tumu(vbool16_t vm, vfloat32m2_t vd,
                                   vfloat32m2_t vs2, vuint32m2_t vs1,
                                   size_t vl);
vfloat32m2_t __riscv_vrgather_tumu(vbool16_t vm, vfloat32m2_t vd,
                                   vfloat32m2_t vs2, size_t vs1, size_t vl);
vfloat32m4_t __riscv_vrgather_tumu(vbool8_t vm, vfloat32m4_t vd,
                                   vfloat32m4_t vs2, vuint32m4_t vs1,
                                   size_t vl);
vfloat32m4_t __riscv_vrgather_tumu(vbool8_t vm, vfloat32m4_t vd,
                                   vfloat32m4_t vs2, size_t vs1, size_t vl);
vfloat32m8_t __riscv_vrgather_tumu(vbool4_t vm, vfloat32m8_t vd,
                                   vfloat32m8_t vs2, vuint32m8_t vs1,
                                   size_t vl);
vfloat32m8_t __riscv_vrgather_tumu(vbool4_t vm, vfloat32m8_t vd,
                                   vfloat32m8_t vs2, size_t vs1, size_t vl);
vfloat64m1_t __riscv_vrgather_tumu(vbool64_t vm, vfloat64m1_t vd,
                                   vfloat64m1_t vs2, vuint64m1_t vs1,
                                   size_t vl);
vfloat64m1_t __riscv_vrgather_tumu(vbool64_t vm, vfloat64m1_t vd,
                                   vfloat64m1_t vs2, size_t vs1, size_t vl);
vfloat64m2_t __riscv_vrgather_tumu(vbool32_t vm, vfloat64m2_t vd,
                                   vfloat64m2_t vs2, vuint64m2_t vs1,
                                   size_t vl);
vfloat64m2_t __riscv_vrgather_tumu(vbool32_t vm, vfloat64m2_t vd,
                                   vfloat64m2_t vs2, size_t vs1, size_t vl);
vfloat64m4_t __riscv_vrgather_tumu(vbool16_t vm, vfloat64m4_t vd,
                                   vfloat64m4_t vs2, vuint64m4_t vs1,
                                   size_t vl);
vfloat64m4_t __riscv_vrgather_tumu(vbool16_t vm, vfloat64m4_t vd,
                                   vfloat64m4_t vs2, size_t vs1, size_t vl);
vfloat64m8_t __riscv_vrgather_tumu(vbool8_t vm, vfloat64m8_t vd,
                                   vfloat64m8_t vs2, vuint64m8_t vs1,
                                   size_t vl);
vfloat64m8_t __riscv_vrgather_tumu(vbool8_t vm, vfloat64m8_t vd,
                                   vfloat64m8_t vs2, size_t vs1, size_t vl);
vfloat16mf4_t __riscv_vrgatherei16_tumu(vbool64_t vm, vfloat16mf4_t vd,
                                        vfloat16mf4_t vs2, vuint16mf4_t vs1,
                                        size_t vl);
vfloat16mf2_t __riscv_vrgatherei16_tumu(vbool32_t vm, vfloat16mf2_t vd,
                                        vfloat16mf2_t vs2, vuint16mf2_t vs1,
                                        size_t vl);
vfloat16m1_t __riscv_vrgatherei16_tumu(vbool16_t vm, vfloat16m1_t vd,
                                       vfloat16m1_t vs2, vuint16m1_t vs1,
                                       size_t vl);
vfloat16m2_t __riscv_vrgatherei16_tumu(vbool8_t vm, vfloat16m2_t vd,
                                       vfloat16m2_t vs2, vuint16m2_t vs1,
                                       size_t vl);
vfloat16m4_t __riscv_vrgatherei16_tumu(vbool4_t vm, vfloat16m4_t vd,
                                       vfloat16m4_t vs2, vuint16m4_t vs1,
                                       size_t vl);
vfloat16m8_t __riscv_vrgatherei16_tumu(vbool2_t vm, vfloat16m8_t vd,
                                       vfloat16m8_t vs2, vuint16m8_t vs1,
                                       size_t vl);
vfloat32mf2_t __riscv_vrgatherei16_tumu(vbool64_t vm, vfloat32mf2_t vd,
                                        vfloat32mf2_t vs2, vuint16mf4_t vs1,
                                        size_t vl);
vfloat32m1_t __riscv_vrgatherei16_tumu(vbool32_t vm, vfloat32m1_t vd,
                                       vfloat32m1_t vs2, vuint16mf2_t vs1,
                                       size_t vl);
vfloat32m2_t __riscv_vrgatherei16_tumu(vbool16_t vm, vfloat32m2_t vd,
                                       vfloat32m2_t vs2, vuint16m1_t vs1,
                                       size_t vl);
vfloat32m4_t __riscv_vrgatherei16_tumu(vbool8_t vm, vfloat32m4_t vd,
                                       vfloat32m4_t vs2, vuint16m2_t vs1,
                                       size_t vl);
vfloat32m8_t __riscv_vrgatherei16_tumu(vbool4_t vm, vfloat32m8_t vd,
                                       vfloat32m8_t vs2, vuint16m4_t vs1,
                                       size_t vl);
vfloat64m1_t __riscv_vrgatherei16_tumu(vbool64_t vm, vfloat64m1_t vd,
                                       vfloat64m1_t vs2, vuint16mf4_t vs1,
                                       size_t vl);
vfloat64m2_t __riscv_vrgatherei16_tumu(vbool32_t vm, vfloat64m2_t vd,
                                       vfloat64m2_t vs2, vuint16mf2_t vs1,
                                       size_t vl);
vfloat64m4_t __riscv_vrgatherei16_tumu(vbool16_t vm, vfloat64m4_t vd,
                                       vfloat64m4_t vs2, vuint16m1_t vs1,
                                       size_t vl);
vfloat64m8_t __riscv_vrgatherei16_tumu(vbool8_t vm, vfloat64m8_t vd,
                                       vfloat64m8_t vs2, vuint16m2_t vs1,
                                       size_t vl);
vint8mf8_t __riscv_vrgather_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                 vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vrgather_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                 size_t vs1, size_t vl);
vint8mf4_t __riscv_vrgather_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                 vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrgather_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                 size_t vs1, size_t vl);
vint8mf2_t __riscv_vrgather_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrgather_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                 size_t vs1, size_t vl);
vint8m1_t __riscv_vrgather_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vrgather_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                size_t vs1, size_t vl);
vint8m2_t __riscv_vrgather_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vrgather_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                size_t vs1, size_t vl);
vint8m4_t __riscv_vrgather_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vrgather_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                size_t vs1, size_t vl);
vint8m8_t __riscv_vrgather_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vrgather_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                                size_t vs1, size_t vl);
vint16mf4_t __riscv_vrgather_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vrgather_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  size_t vs1, size_t vl);
vint16mf2_t __riscv_vrgather_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vrgather_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  size_t vs1, size_t vl);
vint16m1_t __riscv_vrgather_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vrgather_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 size_t vs1, size_t vl);
vint16m2_t __riscv_vrgather_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vrgather_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 size_t vs1, size_t vl);
vint16m4_t __riscv_vrgather_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vrgather_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 size_t vs1, size_t vl);
vint16m8_t __riscv_vrgather_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vrgather_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 size_t vs1, size_t vl);
vint32mf2_t __riscv_vrgather_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vrgather_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  size_t vs1, size_t vl);
vint32m1_t __riscv_vrgather_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vrgather_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 size_t vs1, size_t vl);
vint32m2_t __riscv_vrgather_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vrgather_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 size_t vs1, size_t vl);
vint32m4_t __riscv_vrgather_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vrgather_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 size_t vs1, size_t vl);
vint32m8_t __riscv_vrgather_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vrgather_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 size_t vs1, size_t vl);
vint64m1_t __riscv_vrgather_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vrgather_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 size_t vs1, size_t vl);
vint64m2_t __riscv_vrgather_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vrgather_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 size_t vs1, size_t vl);
vint64m4_t __riscv_vrgather_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vrgather_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 size_t vs1, size_t vl);
vint64m8_t __riscv_vrgather_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vrgather_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 size_t vs1, size_t vl);
vint8mf8_t __riscv_vrgatherei16_tumu(vbool64_t vm, vint8mf8_t vd,
                                     vint8mf8_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vint8mf4_t __riscv_vrgatherei16_tumu(vbool32_t vm, vint8mf4_t vd,
                                     vint8mf4_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vint8mf2_t __riscv_vrgatherei16_tumu(vbool16_t vm, vint8mf2_t vd,
                                     vint8mf2_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vint8m1_t __riscv_vrgatherei16_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                    vuint16m2_t vs1, size_t vl);
vint8m2_t __riscv_vrgatherei16_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                    vuint16m4_t vs1, size_t vl);
vint8m4_t __riscv_vrgatherei16_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                    vuint16m8_t vs1, size_t vl);
vint16mf4_t __riscv_vrgatherei16_tumu(vbool64_t vm, vint16mf4_t vd,
                                      vint16mf4_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vint16mf2_t __riscv_vrgatherei16_tumu(vbool32_t vm, vint16mf2_t vd,
                                      vint16mf2_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vint16m1_t __riscv_vrgatherei16_tumu(vbool16_t vm, vint16m1_t vd,
                                     vint16m1_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vint16m2_t __riscv_vrgatherei16_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                     vuint16m2_t vs1, size_t vl);
vint16m4_t __riscv_vrgatherei16_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                     vuint16m4_t vs1, size_t vl);
vint16m8_t __riscv_vrgatherei16_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                     vuint16m8_t vs1, size_t vl);
vint32mf2_t __riscv_vrgatherei16_tumu(vbool64_t vm, vint32mf2_t vd,
                                      vint32mf2_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vint32m1_t __riscv_vrgatherei16_tumu(vbool32_t vm, vint32m1_t vd,
                                     vint32m1_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vint32m2_t __riscv_vrgatherei16_tumu(vbool16_t vm, vint32m2_t vd,
                                     vint32m2_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vint32m4_t __riscv_vrgatherei16_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                     vuint16m2_t vs1, size_t vl);
vint32m8_t __riscv_vrgatherei16_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                     vuint16m4_t vs1, size_t vl);
vint64m1_t __riscv_vrgatherei16_tumu(vbool64_t vm, vint64m1_t vd,
                                     vint64m1_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vint64m2_t __riscv_vrgatherei16_tumu(vbool32_t vm, vint64m2_t vd,
                                     vint64m2_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vint64m4_t __riscv_vrgatherei16_tumu(vbool16_t vm, vint64m4_t vd,
                                     vint64m4_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vint64m8_t __riscv_vrgatherei16_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                     vuint16m2_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                  vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                  size_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgather_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                  vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgather_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                  size_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgather_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                  vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgather_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                  size_t vs1, size_t vl);
vuint8m1_t __riscv_vrgather_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                 vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vrgather_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                 size_t vs1, size_t vl);
vuint8m2_t __riscv_vrgather_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                 vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vrgather_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                 size_t vs1, size_t vl);
vuint8m4_t __riscv_vrgather_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                 vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vrgather_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                 size_t vs1, size_t vl);
vuint8m8_t __riscv_vrgather_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                 vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vrgather_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                                 size_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgather_tumu(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, vuint16mf4_t vs1,
                                   size_t vl);
vuint16mf4_t __riscv_vrgather_tumu(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, size_t vs1, size_t vl);
vuint16mf2_t __riscv_vrgather_tumu(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, vuint16mf2_t vs1,
                                   size_t vl);
vuint16mf2_t __riscv_vrgather_tumu(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, size_t vs1, size_t vl);
vuint16m1_t __riscv_vrgather_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vrgather_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  size_t vs1, size_t vl);
vuint16m2_t __riscv_vrgather_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vrgather_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  size_t vs1, size_t vl);
vuint16m4_t __riscv_vrgather_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vrgather_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  size_t vs1, size_t vl);
vuint16m8_t __riscv_vrgather_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vrgather_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  size_t vs1, size_t vl);
vuint32mf2_t __riscv_vrgather_tumu(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, vuint32mf2_t vs1,
                                   size_t vl);
vuint32mf2_t __riscv_vrgather_tumu(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, size_t vs1, size_t vl);
vuint32m1_t __riscv_vrgather_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vrgather_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  size_t vs1, size_t vl);
vuint32m2_t __riscv_vrgather_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vrgather_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  size_t vs1, size_t vl);
vuint32m4_t __riscv_vrgather_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vrgather_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  size_t vs1, size_t vl);
vuint32m8_t __riscv_vrgather_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vrgather_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  size_t vs1, size_t vl);
vuint64m1_t __riscv_vrgather_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vrgather_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  size_t vs1, size_t vl);
vuint64m2_t __riscv_vrgather_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vrgather_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  size_t vs1, size_t vl);
vuint64m4_t __riscv_vrgather_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vrgather_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  size_t vs1, size_t vl);
vuint64m8_t __riscv_vrgather_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vrgather_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  size_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgatherei16_tumu(vbool64_t vm, vuint8mf8_t vd,
                                      vuint8mf8_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vuint8mf4_t __riscv_vrgatherei16_tumu(vbool32_t vm, vuint8mf4_t vd,
                                      vuint8mf4_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vuint8mf2_t __riscv_vrgatherei16_tumu(vbool16_t vm, vuint8mf2_t vd,
                                      vuint8mf2_t vs2, vuint16m1_t vs1,
                                      size_t vl);
vuint8m1_t __riscv_vrgatherei16_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                     vuint16m2_t vs1, size_t vl);
vuint8m2_t __riscv_vrgatherei16_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                     vuint16m4_t vs1, size_t vl);
vuint8m4_t __riscv_vrgatherei16_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                     vuint16m8_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgatherei16_tumu(vbool64_t vm, vuint16mf4_t vd,
                                       vuint16mf4_t vs2, vuint16mf4_t vs1,
                                       size_t vl);
vuint16mf2_t __riscv_vrgatherei16_tumu(vbool32_t vm, vuint16mf2_t vd,
                                       vuint16mf2_t vs2, vuint16mf2_t vs1,
                                       size_t vl);
vuint16m1_t __riscv_vrgatherei16_tumu(vbool16_t vm, vuint16m1_t vd,
                                      vuint16m1_t vs2, vuint16m1_t vs1,
                                      size_t vl);
vuint16m2_t __riscv_vrgatherei16_tumu(vbool8_t vm, vuint16m2_t vd,
                                      vuint16m2_t vs2, vuint16m2_t vs1,
                                      size_t vl);
vuint16m4_t __riscv_vrgatherei16_tumu(vbool4_t vm, vuint16m4_t vd,
                                      vuint16m4_t vs2, vuint16m4_t vs1,
                                      size_t vl);
vuint16m8_t __riscv_vrgatherei16_tumu(vbool2_t vm, vuint16m8_t vd,
                                      vuint16m8_t vs2, vuint16m8_t vs1,
                                      size_t vl);
vuint32mf2_t __riscv_vrgatherei16_tumu(vbool64_t vm, vuint32mf2_t vd,
                                       vuint32mf2_t vs2, vuint16mf4_t vs1,
                                       size_t vl);
vuint32m1_t __riscv_vrgatherei16_tumu(vbool32_t vm, vuint32m1_t vd,
                                      vuint32m1_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vuint32m2_t __riscv_vrgatherei16_tumu(vbool16_t vm, vuint32m2_t vd,
                                      vuint32m2_t vs2, vuint16m1_t vs1,
                                      size_t vl);
vuint32m4_t __riscv_vrgatherei16_tumu(vbool8_t vm, vuint32m4_t vd,
                                      vuint32m4_t vs2, vuint16m2_t vs1,
                                      size_t vl);
vuint32m8_t __riscv_vrgatherei16_tumu(vbool4_t vm, vuint32m8_t vd,
                                      vuint32m8_t vs2, vuint16m4_t vs1,
                                      size_t vl);
vuint64m1_t __riscv_vrgatherei16_tumu(vbool64_t vm, vuint64m1_t vd,
                                      vuint64m1_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vuint64m2_t __riscv_vrgatherei16_tumu(vbool32_t vm, vuint64m2_t vd,
                                      vuint64m2_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vuint64m4_t __riscv_vrgatherei16_tumu(vbool16_t vm, vuint64m4_t vd,
                                      vuint64m4_t vs2, vuint16m1_t vs1,
                                      size_t vl);
vuint64m8_t __riscv_vrgatherei16_tumu(vbool8_t vm, vuint64m8_t vd,
                                      vuint64m8_t vs2, vuint16m2_t vs1,
                                      size_t vl);
// masked functions
vfloat16mf4_t __riscv_vrgather_mu(vbool64_t vm, vfloat16mf4_t vd,
                                  vfloat16mf4_t vs2, vuint16mf4_t vs1,
                                  size_t vl);
vfloat16mf4_t __riscv_vrgather_mu(vbool64_t vm, vfloat16mf4_t vd,
                                  vfloat16mf4_t vs2, size_t vs1, size_t vl);
vfloat16mf2_t __riscv_vrgather_mu(vbool32_t vm, vfloat16mf2_t vd,
                                  vfloat16mf2_t vs2, vuint16mf2_t vs1,
                                  size_t vl);
vfloat16mf2_t __riscv_vrgather_mu(vbool32_t vm, vfloat16mf2_t vd,
                                  vfloat16mf2_t vs2, size_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgather_mu(vbool16_t vm, vfloat16m1_t vd,
                                 vfloat16m1_t vs2, vuint16m1_t vs1, size_t vl);
vfloat16m1_t __riscv_vrgather_mu(vbool16_t vm, vfloat16m1_t vd,
                                 vfloat16m1_t vs2, size_t vs1, size_t vl);
vfloat16m2_t __riscv_vrgather_mu(vbool8_t vm, vfloat16m2_t vd, vfloat16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vfloat16m2_t __riscv_vrgather_mu(vbool8_t vm, vfloat16m2_t vd, vfloat16m2_t vs2,
                                 size_t vs1, size_t vl);
vfloat16m4_t __riscv_vrgather_mu(vbool4_t vm, vfloat16m4_t vd, vfloat16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vfloat16m4_t __riscv_vrgather_mu(vbool4_t vm, vfloat16m4_t vd, vfloat16m4_t vs2,
                                 size_t vs1, size_t vl);
vfloat16m8_t __riscv_vrgather_mu(vbool2_t vm, vfloat16m8_t vd, vfloat16m8_t vs2,
                                 vuint16m8_t vs1, size_t vl);
vfloat16m8_t __riscv_vrgather_mu(vbool2_t vm, vfloat16m8_t vd, vfloat16m8_t vs2,
                                 size_t vs1, size_t vl);
vfloat32mf2_t __riscv_vrgather_mu(vbool64_t vm, vfloat32mf2_t vd,
                                  vfloat32mf2_t vs2, vuint32mf2_t vs1,
                                  size_t vl);
vfloat32mf2_t __riscv_vrgather_mu(vbool64_t vm, vfloat32mf2_t vd,
                                  vfloat32mf2_t vs2, size_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgather_mu(vbool32_t vm, vfloat32m1_t vd,
                                 vfloat32m1_t vs2, vuint32m1_t vs1, size_t vl);
vfloat32m1_t __riscv_vrgather_mu(vbool32_t vm, vfloat32m1_t vd,
                                 vfloat32m1_t vs2, size_t vs1, size_t vl);
vfloat32m2_t __riscv_vrgather_mu(vbool16_t vm, vfloat32m2_t vd,
                                 vfloat32m2_t vs2, vuint32m2_t vs1, size_t vl);
vfloat32m2_t __riscv_vrgather_mu(vbool16_t vm, vfloat32m2_t vd,
                                 vfloat32m2_t vs2, size_t vs1, size_t vl);
vfloat32m4_t __riscv_vrgather_mu(vbool8_t vm, vfloat32m4_t vd, vfloat32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vfloat32m4_t __riscv_vrgather_mu(vbool8_t vm, vfloat32m4_t vd, vfloat32m4_t vs2,
                                 size_t vs1, size_t vl);
vfloat32m8_t __riscv_vrgather_mu(vbool4_t vm, vfloat32m8_t vd, vfloat32m8_t vs2,
                                 vuint32m8_t vs1, size_t vl);
vfloat32m8_t __riscv_vrgather_mu(vbool4_t vm, vfloat32m8_t vd, vfloat32m8_t vs2,
                                 size_t vs1, size_t vl);
vfloat64m1_t __riscv_vrgather_mu(vbool64_t vm, vfloat64m1_t vd,
                                 vfloat64m1_t vs2, vuint64m1_t vs1, size_t vl);
vfloat64m1_t __riscv_vrgather_mu(vbool64_t vm, vfloat64m1_t vd,
                                 vfloat64m1_t vs2, size_t vs1, size_t vl);
vfloat64m2_t __riscv_vrgather_mu(vbool32_t vm, vfloat64m2_t vd,
                                 vfloat64m2_t vs2, vuint64m2_t vs1, size_t vl);
vfloat64m2_t __riscv_vrgather_mu(vbool32_t vm, vfloat64m2_t vd,
                                 vfloat64m2_t vs2, size_t vs1, size_t vl);
vfloat64m4_t __riscv_vrgather_mu(vbool16_t vm, vfloat64m4_t vd,
                                 vfloat64m4_t vs2, vuint64m4_t vs1, size_t vl);
vfloat64m4_t __riscv_vrgather_mu(vbool16_t vm, vfloat64m4_t vd,
                                 vfloat64m4_t vs2, size_t vs1, size_t vl);
vfloat64m8_t __riscv_vrgather_mu(vbool8_t vm, vfloat64m8_t vd, vfloat64m8_t vs2,
                                 vuint64m8_t vs1, size_t vl);
vfloat64m8_t __riscv_vrgather_mu(vbool8_t vm, vfloat64m8_t vd, vfloat64m8_t vs2,
                                 size_t vs1, size_t vl);
vfloat16mf4_t __riscv_vrgatherei16_mu(vbool64_t vm, vfloat16mf4_t vd,
                                      vfloat16mf4_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vfloat16mf2_t __riscv_vrgatherei16_mu(vbool32_t vm, vfloat16mf2_t vd,
                                      vfloat16mf2_t vs2, vuint16mf2_t vs1,
                                      size_t vl);
vfloat16m1_t __riscv_vrgatherei16_mu(vbool16_t vm, vfloat16m1_t vd,
                                     vfloat16m1_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vfloat16m2_t __riscv_vrgatherei16_mu(vbool8_t vm, vfloat16m2_t vd,
                                     vfloat16m2_t vs2, vuint16m2_t vs1,
                                     size_t vl);
vfloat16m4_t __riscv_vrgatherei16_mu(vbool4_t vm, vfloat16m4_t vd,
                                     vfloat16m4_t vs2, vuint16m4_t vs1,
                                     size_t vl);
vfloat16m8_t __riscv_vrgatherei16_mu(vbool2_t vm, vfloat16m8_t vd,
                                     vfloat16m8_t vs2, vuint16m8_t vs1,
                                     size_t vl);
vfloat32mf2_t __riscv_vrgatherei16_mu(vbool64_t vm, vfloat32mf2_t vd,
                                      vfloat32mf2_t vs2, vuint16mf4_t vs1,
                                      size_t vl);
vfloat32m1_t __riscv_vrgatherei16_mu(vbool32_t vm, vfloat32m1_t vd,
                                     vfloat32m1_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vfloat32m2_t __riscv_vrgatherei16_mu(vbool16_t vm, vfloat32m2_t vd,
                                     vfloat32m2_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vfloat32m4_t __riscv_vrgatherei16_mu(vbool8_t vm, vfloat32m4_t vd,
                                     vfloat32m4_t vs2, vuint16m2_t vs1,
                                     size_t vl);
vfloat32m8_t __riscv_vrgatherei16_mu(vbool4_t vm, vfloat32m8_t vd,
                                     vfloat32m8_t vs2, vuint16m4_t vs1,
                                     size_t vl);
vfloat64m1_t __riscv_vrgatherei16_mu(vbool64_t vm, vfloat64m1_t vd,
                                     vfloat64m1_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vfloat64m2_t __riscv_vrgatherei16_mu(vbool32_t vm, vfloat64m2_t vd,
                                     vfloat64m2_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vfloat64m4_t __riscv_vrgatherei16_mu(vbool16_t vm, vfloat64m4_t vd,
                                     vfloat64m4_t vs2, vuint16m1_t vs1,
                                     size_t vl);
vfloat64m8_t __riscv_vrgatherei16_mu(vbool8_t vm, vfloat64m8_t vd,
                                     vfloat64m8_t vs2, vuint16m2_t vs1,
                                     size_t vl);
vint8mf8_t __riscv_vrgather_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vrgather_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                               size_t vs1, size_t vl);
vint8mf4_t __riscv_vrgather_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrgather_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                               size_t vs1, size_t vl);
vint8mf2_t __riscv_vrgather_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrgather_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                               size_t vs1, size_t vl);
vint8m1_t __riscv_vrgather_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vrgather_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                              size_t vs1, size_t vl);
vint8m2_t __riscv_vrgather_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vrgather_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                              size_t vs1, size_t vl);
vint8m4_t __riscv_vrgather_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vrgather_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                              size_t vs1, size_t vl);
vint8m8_t __riscv_vrgather_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                              vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vrgather_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                              size_t vs1, size_t vl);
vint16mf4_t __riscv_vrgather_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vrgather_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                size_t vs1, size_t vl);
vint16mf2_t __riscv_vrgather_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vrgather_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                size_t vs1, size_t vl);
vint16m1_t __riscv_vrgather_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vrgather_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               size_t vs1, size_t vl);
vint16m2_t __riscv_vrgather_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vrgather_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               size_t vs1, size_t vl);
vint16m4_t __riscv_vrgather_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vrgather_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               size_t vs1, size_t vl);
vint16m8_t __riscv_vrgather_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vrgather_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               size_t vs1, size_t vl);
vint32mf2_t __riscv_vrgather_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vrgather_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                size_t vs1, size_t vl);
vint32m1_t __riscv_vrgather_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vrgather_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               size_t vs1, size_t vl);
vint32m2_t __riscv_vrgather_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vrgather_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               size_t vs1, size_t vl);
vint32m4_t __riscv_vrgather_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vrgather_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               size_t vs1, size_t vl);
vint32m8_t __riscv_vrgather_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vrgather_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               size_t vs1, size_t vl);
vint64m1_t __riscv_vrgather_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vrgather_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               size_t vs1, size_t vl);
vint64m2_t __riscv_vrgather_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vrgather_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               size_t vs1, size_t vl);
vint64m4_t __riscv_vrgather_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vrgather_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               size_t vs1, size_t vl);
vint64m8_t __riscv_vrgather_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vrgather_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               size_t vs1, size_t vl);
vint8mf8_t __riscv_vrgatherei16_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                   vuint16mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrgatherei16_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                   vuint16mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrgatherei16_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint8m1_t __riscv_vrgatherei16_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                                  vuint16m2_t vs1, size_t vl);
vint8m2_t __riscv_vrgatherei16_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                                  vuint16m4_t vs1, size_t vl);
vint8m4_t __riscv_vrgatherei16_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                                  vuint16m8_t vs1, size_t vl);
vint16mf4_t __riscv_vrgatherei16_mu(vbool64_t vm, vint16mf4_t vd,
                                    vint16mf4_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vint16mf2_t __riscv_vrgatherei16_mu(vbool32_t vm, vint16mf2_t vd,
                                    vint16mf2_t vs2, vuint16mf2_t vs1,
                                    size_t vl);
vint16m1_t __riscv_vrgatherei16_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint16m2_t __riscv_vrgatherei16_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vint16m4_t __riscv_vrgatherei16_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vint16m8_t __riscv_vrgatherei16_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                   vuint16m8_t vs1, size_t vl);
vint32mf2_t __riscv_vrgatherei16_mu(vbool64_t vm, vint32mf2_t vd,
                                    vint32mf2_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vint32m1_t __riscv_vrgatherei16_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                   vuint16mf2_t vs1, size_t vl);
vint32m2_t __riscv_vrgatherei16_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint32m4_t __riscv_vrgatherei16_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vint32m8_t __riscv_vrgatherei16_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vint64m1_t __riscv_vrgatherei16_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                   vuint16mf4_t vs1, size_t vl);
vint64m2_t __riscv_vrgatherei16_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                   vuint16mf2_t vs1, size_t vl);
vint64m4_t __riscv_vrgatherei16_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                   vuint16m1_t vs1, size_t vl);
vint64m8_t __riscv_vrgatherei16_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgather_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                size_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgather_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vrgather_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                size_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgather_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vrgather_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                size_t vs1, size_t vl);
vuint8m1_t __riscv_vrgather_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                               vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vrgather_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                               size_t vs1, size_t vl);
vuint8m2_t __riscv_vrgather_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                               vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vrgather_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                               size_t vs1, size_t vl);
vuint8m4_t __riscv_vrgather_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                               vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vrgather_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                               size_t vs1, size_t vl);
vuint8m8_t __riscv_vrgather_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                               vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vrgather_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                               size_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgather_mu(vbool64_t vm, vuint16mf4_t vd,
                                 vuint16mf4_t vs2, vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgather_mu(vbool64_t vm, vuint16mf4_t vd,
                                 vuint16mf4_t vs2, size_t vs1, size_t vl);
vuint16mf2_t __riscv_vrgather_mu(vbool32_t vm, vuint16mf2_t vd,
                                 vuint16mf2_t vs2, vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vrgather_mu(vbool32_t vm, vuint16mf2_t vd,
                                 vuint16mf2_t vs2, size_t vs1, size_t vl);
vuint16m1_t __riscv_vrgather_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vrgather_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                size_t vs1, size_t vl);
vuint16m2_t __riscv_vrgather_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vrgather_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                size_t vs1, size_t vl);
vuint16m4_t __riscv_vrgather_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vrgather_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                size_t vs1, size_t vl);
vuint16m8_t __riscv_vrgather_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vrgather_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                size_t vs1, size_t vl);
vuint32mf2_t __riscv_vrgather_mu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint32mf2_t vs2, vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vrgather_mu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint32mf2_t vs2, size_t vs1, size_t vl);
vuint32m1_t __riscv_vrgather_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vrgather_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                size_t vs1, size_t vl);
vuint32m2_t __riscv_vrgather_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vrgather_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                size_t vs1, size_t vl);
vuint32m4_t __riscv_vrgather_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vrgather_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                size_t vs1, size_t vl);
vuint32m8_t __riscv_vrgather_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vrgather_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                size_t vs1, size_t vl);
vuint64m1_t __riscv_vrgather_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vrgather_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                size_t vs1, size_t vl);
vuint64m2_t __riscv_vrgather_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vrgather_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                size_t vs1, size_t vl);
vuint64m4_t __riscv_vrgather_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vrgather_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                size_t vs1, size_t vl);
vuint64m8_t __riscv_vrgather_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vrgather_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                size_t vs1, size_t vl);
vuint8mf8_t __riscv_vrgatherei16_mu(vbool64_t vm, vuint8mf8_t vd,
                                    vuint8mf8_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vuint8mf4_t __riscv_vrgatherei16_mu(vbool32_t vm, vuint8mf4_t vd,
                                    vuint8mf4_t vs2, vuint16mf2_t vs1,
                                    size_t vl);
vuint8mf2_t __riscv_vrgatherei16_mu(vbool16_t vm, vuint8mf2_t vd,
                                    vuint8mf2_t vs2, vuint16m1_t vs1,
                                    size_t vl);
vuint8m1_t __riscv_vrgatherei16_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint8m2_t __riscv_vrgatherei16_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vuint8m4_t __riscv_vrgatherei16_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                                   vuint16m8_t vs1, size_t vl);
vuint16mf4_t __riscv_vrgatherei16_mu(vbool64_t vm, vuint16mf4_t vd,
                                     vuint16mf4_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vuint16mf2_t __riscv_vrgatherei16_mu(vbool32_t vm, vuint16mf2_t vd,
                                     vuint16mf2_t vs2, vuint16mf2_t vs1,
                                     size_t vl);
vuint16m1_t __riscv_vrgatherei16_mu(vbool16_t vm, vuint16m1_t vd,
                                    vuint16m1_t vs2, vuint16m1_t vs1,
                                    size_t vl);
vuint16m2_t __riscv_vrgatherei16_mu(vbool8_t vm, vuint16m2_t vd,
                                    vuint16m2_t vs2, vuint16m2_t vs1,
                                    size_t vl);
vuint16m4_t __riscv_vrgatherei16_mu(vbool4_t vm, vuint16m4_t vd,
                                    vuint16m4_t vs2, vuint16m4_t vs1,
                                    size_t vl);
vuint16m8_t __riscv_vrgatherei16_mu(vbool2_t vm, vuint16m8_t vd,
                                    vuint16m8_t vs2, vuint16m8_t vs1,
                                    size_t vl);
vuint32mf2_t __riscv_vrgatherei16_mu(vbool64_t vm, vuint32mf2_t vd,
                                     vuint32mf2_t vs2, vuint16mf4_t vs1,
                                     size_t vl);
vuint32m1_t __riscv_vrgatherei16_mu(vbool32_t vm, vuint32m1_t vd,
                                    vuint32m1_t vs2, vuint16mf2_t vs1,
                                    size_t vl);
vuint32m2_t __riscv_vrgatherei16_mu(vbool16_t vm, vuint32m2_t vd,
                                    vuint32m2_t vs2, vuint16m1_t vs1,
                                    size_t vl);
vuint32m4_t __riscv_vrgatherei16_mu(vbool8_t vm, vuint32m4_t vd,
                                    vuint32m4_t vs2, vuint16m2_t vs1,
                                    size_t vl);
vuint32m8_t __riscv_vrgatherei16_mu(vbool4_t vm, vuint32m8_t vd,
                                    vuint32m8_t vs2, vuint16m4_t vs1,
                                    size_t vl);
vuint64m1_t __riscv_vrgatherei16_mu(vbool64_t vm, vuint64m1_t vd,
                                    vuint64m1_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vuint64m2_t __riscv_vrgatherei16_mu(vbool32_t vm, vuint64m2_t vd,
                                    vuint64m2_t vs2, vuint16mf2_t vs1,
                                    size_t vl);
vuint64m4_t __riscv_vrgatherei16_mu(vbool16_t vm, vuint64m4_t vd,
                                    vuint64m4_t vs2, vuint16m1_t vs1,
                                    size_t vl);
vuint64m8_t __riscv_vrgatherei16_mu(vbool8_t vm, vuint64m8_t vd,
                                    vuint64m8_t vs2, vuint16m2_t vs1,
                                    size_t vl);

Vector Compress Intrinsics

vfloat16mf4_t __riscv_vcompress_tu(vfloat16mf4_t vd, vfloat16mf4_t vs2,
                                   vbool64_t vs1, size_t vl);
vfloat16mf2_t __riscv_vcompress_tu(vfloat16mf2_t vd, vfloat16mf2_t vs2,
                                   vbool32_t vs1, size_t vl);
vfloat16m1_t __riscv_vcompress_tu(vfloat16m1_t vd, vfloat16m1_t vs2,
                                  vbool16_t vs1, size_t vl);
vfloat16m2_t __riscv_vcompress_tu(vfloat16m2_t vd, vfloat16m2_t vs2,
                                  vbool8_t vs1, size_t vl);
vfloat16m4_t __riscv_vcompress_tu(vfloat16m4_t vd, vfloat16m4_t vs2,
                                  vbool4_t vs1, size_t vl);
vfloat16m8_t __riscv_vcompress_tu(vfloat16m8_t vd, vfloat16m8_t vs2,
                                  vbool2_t vs1, size_t vl);
vfloat32mf2_t __riscv_vcompress_tu(vfloat32mf2_t vd, vfloat32mf2_t vs2,
                                   vbool64_t vs1, size_t vl);
vfloat32m1_t __riscv_vcompress_tu(vfloat32m1_t vd, vfloat32m1_t vs2,
                                  vbool32_t vs1, size_t vl);
vfloat32m2_t __riscv_vcompress_tu(vfloat32m2_t vd, vfloat32m2_t vs2,
                                  vbool16_t vs1, size_t vl);
vfloat32m4_t __riscv_vcompress_tu(vfloat32m4_t vd, vfloat32m4_t vs2,
                                  vbool8_t vs1, size_t vl);
vfloat32m8_t __riscv_vcompress_tu(vfloat32m8_t vd, vfloat32m8_t vs2,
                                  vbool4_t vs1, size_t vl);
vfloat64m1_t __riscv_vcompress_tu(vfloat64m1_t vd, vfloat64m1_t vs2,
                                  vbool64_t vs1, size_t vl);
vfloat64m2_t __riscv_vcompress_tu(vfloat64m2_t vd, vfloat64m2_t vs2,
                                  vbool32_t vs1, size_t vl);
vfloat64m4_t __riscv_vcompress_tu(vfloat64m4_t vd, vfloat64m4_t vs2,
                                  vbool16_t vs1, size_t vl);
vfloat64m8_t __riscv_vcompress_tu(vfloat64m8_t vd, vfloat64m8_t vs2,
                                  vbool8_t vs1, size_t vl);
vint8mf8_t __riscv_vcompress_tu(vint8mf8_t vd, vint8mf8_t vs2, vbool64_t vs1,
                                size_t vl);
vint8mf4_t __riscv_vcompress_tu(vint8mf4_t vd, vint8mf4_t vs2, vbool32_t vs1,
                                size_t vl);
vint8mf2_t __riscv_vcompress_tu(vint8mf2_t vd, vint8mf2_t vs2, vbool16_t vs1,
                                size_t vl);
vint8m1_t __riscv_vcompress_tu(vint8m1_t vd, vint8m1_t vs2, vbool8_t vs1,
                               size_t vl);
vint8m2_t __riscv_vcompress_tu(vint8m2_t vd, vint8m2_t vs2, vbool4_t vs1,
                               size_t vl);
vint8m4_t __riscv_vcompress_tu(vint8m4_t vd, vint8m4_t vs2, vbool2_t vs1,
                               size_t vl);
vint8m8_t __riscv_vcompress_tu(vint8m8_t vd, vint8m8_t vs2, vbool1_t vs1,
                               size_t vl);
vint16mf4_t __riscv_vcompress_tu(vint16mf4_t vd, vint16mf4_t vs2, vbool64_t vs1,
                                 size_t vl);
vint16mf2_t __riscv_vcompress_tu(vint16mf2_t vd, vint16mf2_t vs2, vbool32_t vs1,
                                 size_t vl);
vint16m1_t __riscv_vcompress_tu(vint16m1_t vd, vint16m1_t vs2, vbool16_t vs1,
                                size_t vl);
vint16m2_t __riscv_vcompress_tu(vint16m2_t vd, vint16m2_t vs2, vbool8_t vs1,
                                size_t vl);
vint16m4_t __riscv_vcompress_tu(vint16m4_t vd, vint16m4_t vs2, vbool4_t vs1,
                                size_t vl);
vint16m8_t __riscv_vcompress_tu(vint16m8_t vd, vint16m8_t vs2, vbool2_t vs1,
                                size_t vl);
vint32mf2_t __riscv_vcompress_tu(vint32mf2_t vd, vint32mf2_t vs2, vbool64_t vs1,
                                 size_t vl);
vint32m1_t __riscv_vcompress_tu(vint32m1_t vd, vint32m1_t vs2, vbool32_t vs1,
                                size_t vl);
vint32m2_t __riscv_vcompress_tu(vint32m2_t vd, vint32m2_t vs2, vbool16_t vs1,
                                size_t vl);
vint32m4_t __riscv_vcompress_tu(vint32m4_t vd, vint32m4_t vs2, vbool8_t vs1,
                                size_t vl);
vint32m8_t __riscv_vcompress_tu(vint32m8_t vd, vint32m8_t vs2, vbool4_t vs1,
                                size_t vl);
vint64m1_t __riscv_vcompress_tu(vint64m1_t vd, vint64m1_t vs2, vbool64_t vs1,
                                size_t vl);
vint64m2_t __riscv_vcompress_tu(vint64m2_t vd, vint64m2_t vs2, vbool32_t vs1,
                                size_t vl);
vint64m4_t __riscv_vcompress_tu(vint64m4_t vd, vint64m4_t vs2, vbool16_t vs1,
                                size_t vl);
vint64m8_t __riscv_vcompress_tu(vint64m8_t vd, vint64m8_t vs2, vbool8_t vs1,
                                size_t vl);
vuint8mf8_t __riscv_vcompress_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vbool64_t vs1,
                                 size_t vl);
vuint8mf4_t __riscv_vcompress_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vbool32_t vs1,
                                 size_t vl);
vuint8mf2_t __riscv_vcompress_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vbool16_t vs1,
                                 size_t vl);
vuint8m1_t __riscv_vcompress_tu(vuint8m1_t vd, vuint8m1_t vs2, vbool8_t vs1,
                                size_t vl);
vuint8m2_t __riscv_vcompress_tu(vuint8m2_t vd, vuint8m2_t vs2, vbool4_t vs1,
                                size_t vl);
vuint8m4_t __riscv_vcompress_tu(vuint8m4_t vd, vuint8m4_t vs2, vbool2_t vs1,
                                size_t vl);
vuint8m8_t __riscv_vcompress_tu(vuint8m8_t vd, vuint8m8_t vs2, vbool1_t vs1,
                                size_t vl);
vuint16mf4_t __riscv_vcompress_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                  vbool64_t vs1, size_t vl);
vuint16mf2_t __riscv_vcompress_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                  vbool32_t vs1, size_t vl);
vuint16m1_t __riscv_vcompress_tu(vuint16m1_t vd, vuint16m1_t vs2, vbool16_t vs1,
                                 size_t vl);
vuint16m2_t __riscv_vcompress_tu(vuint16m2_t vd, vuint16m2_t vs2, vbool8_t vs1,
                                 size_t vl);
vuint16m4_t __riscv_vcompress_tu(vuint16m4_t vd, vuint16m4_t vs2, vbool4_t vs1,
                                 size_t vl);
vuint16m8_t __riscv_vcompress_tu(vuint16m8_t vd, vuint16m8_t vs2, vbool2_t vs1,
                                 size_t vl);
vuint32mf2_t __riscv_vcompress_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                  vbool64_t vs1, size_t vl);
vuint32m1_t __riscv_vcompress_tu(vuint32m1_t vd, vuint32m1_t vs2, vbool32_t vs1,
                                 size_t vl);
vuint32m2_t __riscv_vcompress_tu(vuint32m2_t vd, vuint32m2_t vs2, vbool16_t vs1,
                                 size_t vl);
vuint32m4_t __riscv_vcompress_tu(vuint32m4_t vd, vuint32m4_t vs2, vbool8_t vs1,
                                 size_t vl);
vuint32m8_t __riscv_vcompress_tu(vuint32m8_t vd, vuint32m8_t vs2, vbool4_t vs1,
                                 size_t vl);
vuint64m1_t __riscv_vcompress_tu(vuint64m1_t vd, vuint64m1_t vs2, vbool64_t vs1,
                                 size_t vl);
vuint64m2_t __riscv_vcompress_tu(vuint64m2_t vd, vuint64m2_t vs2, vbool32_t vs1,
                                 size_t vl);
vuint64m4_t __riscv_vcompress_tu(vuint64m4_t vd, vuint64m4_t vs2, vbool16_t vs1,
                                 size_t vl);
vuint64m8_t __riscv_vcompress_tu(vuint64m8_t vd, vuint64m8_t vs2, vbool8_t vs1,
                                 size_t vl);