Vector Integer Arithmetic Intrinsics

Vector Single-Width Integer Add and Subtract Intrinsics

vint8mf8_t __riscv_vadd_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vadd_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vadd_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vadd_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vadd_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vadd_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vadd_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vadd_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vadd_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vadd_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vadd_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vadd_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vadd_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vadd_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vadd_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vadd_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vadd_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vadd_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vadd_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vadd_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vadd_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vadd_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vadd_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vadd_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vadd_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vadd_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vadd_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vadd_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vadd_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vadd_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vadd_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vadd_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vadd_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vadd_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vadd_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vadd_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vadd_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vadd_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vadd_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vadd_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vadd_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vadd_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vadd_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vadd_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vint8mf8_t __riscv_vsub_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vsub_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vsub_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vsub_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vsub_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vsub_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vsub_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vsub_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vsub_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vsub_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vsub_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vsub_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vsub_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vsub_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vsub_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vsub_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vsub_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vsub_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vsub_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vsub_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vsub_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vsub_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vsub_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vsub_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vsub_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vsub_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vsub_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vsub_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vsub_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vsub_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vsub_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vsub_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vsub_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vsub_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vsub_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vsub_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vsub_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vsub_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vsub_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vsub_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vsub_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vsub_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vsub_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vsub_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vint8mf8_t __riscv_vrsub_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                            size_t vl);
vint8mf4_t __riscv_vrsub_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                            size_t vl);
vint8mf2_t __riscv_vrsub_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                            size_t vl);
vint8m1_t __riscv_vrsub_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vrsub_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vrsub_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vrsub_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vrsub_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                             size_t vl);
vint16mf2_t __riscv_vrsub_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                             size_t vl);
vint16m1_t __riscv_vrsub_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                            size_t vl);
vint16m2_t __riscv_vrsub_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                            size_t vl);
vint16m4_t __riscv_vrsub_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                            size_t vl);
vint16m8_t __riscv_vrsub_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                            size_t vl);
vint32mf2_t __riscv_vrsub_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                             size_t vl);
vint32m1_t __riscv_vrsub_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                            size_t vl);
vint32m2_t __riscv_vrsub_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                            size_t vl);
vint32m4_t __riscv_vrsub_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                            size_t vl);
vint32m8_t __riscv_vrsub_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                            size_t vl);
vint64m1_t __riscv_vrsub_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                            size_t vl);
vint64m2_t __riscv_vrsub_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                            size_t vl);
vint64m4_t __riscv_vrsub_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                            size_t vl);
vint64m8_t __riscv_vrsub_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                            size_t vl);
vint8mf8_t __riscv_vneg_tu(vint8mf8_t vd, vint8mf8_t vs, size_t vl);
vint8mf4_t __riscv_vneg_tu(vint8mf4_t vd, vint8mf4_t vs, size_t vl);
vint8mf2_t __riscv_vneg_tu(vint8mf2_t vd, vint8mf2_t vs, size_t vl);
vint8m1_t __riscv_vneg_tu(vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vneg_tu(vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vneg_tu(vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vneg_tu(vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vneg_tu(vint16mf4_t vd, vint16mf4_t vs, size_t vl);
vint16mf2_t __riscv_vneg_tu(vint16mf2_t vd, vint16mf2_t vs, size_t vl);
vint16m1_t __riscv_vneg_tu(vint16m1_t vd, vint16m1_t vs, size_t vl);
vint16m2_t __riscv_vneg_tu(vint16m2_t vd, vint16m2_t vs, size_t vl);
vint16m4_t __riscv_vneg_tu(vint16m4_t vd, vint16m4_t vs, size_t vl);
vint16m8_t __riscv_vneg_tu(vint16m8_t vd, vint16m8_t vs, size_t vl);
vint32mf2_t __riscv_vneg_tu(vint32mf2_t vd, vint32mf2_t vs, size_t vl);
vint32m1_t __riscv_vneg_tu(vint32m1_t vd, vint32m1_t vs, size_t vl);
vint32m2_t __riscv_vneg_tu(vint32m2_t vd, vint32m2_t vs, size_t vl);
vint32m4_t __riscv_vneg_tu(vint32m4_t vd, vint32m4_t vs, size_t vl);
vint32m8_t __riscv_vneg_tu(vint32m8_t vd, vint32m8_t vs, size_t vl);
vint64m1_t __riscv_vneg_tu(vint64m1_t vd, vint64m1_t vs, size_t vl);
vint64m2_t __riscv_vneg_tu(vint64m2_t vd, vint64m2_t vs, size_t vl);
vint64m4_t __riscv_vneg_tu(vint64m4_t vd, vint64m4_t vs, size_t vl);
vint64m8_t __riscv_vneg_tu(vint64m8_t vd, vint64m8_t vs, size_t vl);
vuint8mf8_t __riscv_vadd_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vuint8mf8_t __riscv_vadd_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf4_t __riscv_vadd_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vuint8mf4_t __riscv_vadd_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf2_t __riscv_vadd_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vuint8mf2_t __riscv_vadd_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m1_t __riscv_vadd_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           size_t vl);
vuint8m1_t __riscv_vadd_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m2_t __riscv_vadd_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           size_t vl);
vuint8m2_t __riscv_vadd_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m4_t __riscv_vadd_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           size_t vl);
vuint8m4_t __riscv_vadd_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m8_t __riscv_vadd_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           size_t vl);
vuint8m8_t __riscv_vadd_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                           size_t vl);
vuint16mf4_t __riscv_vadd_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vadd_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16mf2_t __riscv_vadd_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vadd_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m1_t __riscv_vadd_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            size_t vl);
vuint16m1_t __riscv_vadd_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m2_t __riscv_vadd_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            size_t vl);
vuint16m2_t __riscv_vadd_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m4_t __riscv_vadd_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            size_t vl);
vuint16m4_t __riscv_vadd_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m8_t __riscv_vadd_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            size_t vl);
vuint16m8_t __riscv_vadd_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                            size_t vl);
vuint32mf2_t __riscv_vadd_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vadd_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m1_t __riscv_vadd_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            size_t vl);
vuint32m1_t __riscv_vadd_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m2_t __riscv_vadd_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            size_t vl);
vuint32m2_t __riscv_vadd_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m4_t __riscv_vadd_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            size_t vl);
vuint32m4_t __riscv_vadd_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m8_t __riscv_vadd_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            size_t vl);
vuint32m8_t __riscv_vadd_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                            size_t vl);
vuint64m1_t __riscv_vadd_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            size_t vl);
vuint64m1_t __riscv_vadd_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m2_t __riscv_vadd_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            size_t vl);
vuint64m2_t __riscv_vadd_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m4_t __riscv_vadd_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            size_t vl);
vuint64m4_t __riscv_vadd_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m8_t __riscv_vadd_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            size_t vl);
vuint64m8_t __riscv_vadd_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                            size_t vl);
vuint8mf8_t __riscv_vsub_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vuint8mf8_t __riscv_vsub_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf4_t __riscv_vsub_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vuint8mf4_t __riscv_vsub_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf2_t __riscv_vsub_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vuint8mf2_t __riscv_vsub_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m1_t __riscv_vsub_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           size_t vl);
vuint8m1_t __riscv_vsub_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m2_t __riscv_vsub_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           size_t vl);
vuint8m2_t __riscv_vsub_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m4_t __riscv_vsub_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           size_t vl);
vuint8m4_t __riscv_vsub_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m8_t __riscv_vsub_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           size_t vl);
vuint8m8_t __riscv_vsub_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                           size_t vl);
vuint16mf4_t __riscv_vsub_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsub_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16mf2_t __riscv_vsub_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsub_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m1_t __riscv_vsub_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            size_t vl);
vuint16m1_t __riscv_vsub_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m2_t __riscv_vsub_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            size_t vl);
vuint16m2_t __riscv_vsub_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m4_t __riscv_vsub_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            size_t vl);
vuint16m4_t __riscv_vsub_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m8_t __riscv_vsub_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            size_t vl);
vuint16m8_t __riscv_vsub_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                            size_t vl);
vuint32mf2_t __riscv_vsub_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsub_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m1_t __riscv_vsub_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            size_t vl);
vuint32m1_t __riscv_vsub_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m2_t __riscv_vsub_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            size_t vl);
vuint32m2_t __riscv_vsub_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m4_t __riscv_vsub_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            size_t vl);
vuint32m4_t __riscv_vsub_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m8_t __riscv_vsub_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            size_t vl);
vuint32m8_t __riscv_vsub_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                            size_t vl);
vuint64m1_t __riscv_vsub_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            size_t vl);
vuint64m1_t __riscv_vsub_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m2_t __riscv_vsub_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            size_t vl);
vuint64m2_t __riscv_vsub_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m4_t __riscv_vsub_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            size_t vl);
vuint64m4_t __riscv_vsub_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m8_t __riscv_vsub_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            size_t vl);
vuint64m8_t __riscv_vsub_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                            size_t vl);
vuint8mf8_t __riscv_vrsub_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf4_t __riscv_vrsub_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf2_t __riscv_vrsub_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m1_t __riscv_vrsub_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m2_t __riscv_vrsub_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m4_t __riscv_vrsub_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m8_t __riscv_vrsub_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                            size_t vl);
vuint16mf4_t __riscv_vrsub_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                              size_t vl);
vuint16mf2_t __riscv_vrsub_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m1_t __riscv_vrsub_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m2_t __riscv_vrsub_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m4_t __riscv_vrsub_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m8_t __riscv_vrsub_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                             size_t vl);
vuint32mf2_t __riscv_vrsub_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m1_t __riscv_vrsub_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m2_t __riscv_vrsub_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m4_t __riscv_vrsub_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m8_t __riscv_vrsub_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                             size_t vl);
vuint64m1_t __riscv_vrsub_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m2_t __riscv_vrsub_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m4_t __riscv_vrsub_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m8_t __riscv_vrsub_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                             size_t vl);
// masked functions
vint8mf8_t __riscv_vadd_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vadd_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vadd_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vadd_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vadd_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vadd_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vadd_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vadd_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vadd_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vadd_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vadd_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vadd_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vadd_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vadd_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vadd_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vadd_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vadd_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vadd_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vadd_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vadd_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vadd_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vadd_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vadd_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vadd_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vadd_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vadd_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vadd_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vadd_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vadd_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vadd_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vadd_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vadd_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vadd_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vadd_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vadd_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vadd_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vadd_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vadd_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vadd_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vadd_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vadd_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vadd_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vadd_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vadd_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vsub_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsub_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vsub_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsub_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vsub_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsub_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vsub_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsub_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vsub_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsub_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vsub_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsub_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vsub_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsub_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vsub_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsub_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vsub_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsub_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vsub_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsub_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vsub_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsub_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vsub_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsub_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vsub_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsub_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vsub_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsub_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vsub_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsub_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vsub_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsub_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vsub_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsub_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vsub_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsub_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vsub_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsub_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vsub_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsub_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vsub_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsub_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vsub_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsub_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vrsub_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vrsub_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vrsub_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vrsub_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vrsub_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vrsub_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vrsub_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vrsub_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vrsub_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vrsub_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vrsub_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vrsub_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vrsub_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vrsub_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vrsub_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vrsub_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vrsub_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vrsub_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vrsub_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vrsub_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vrsub_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vrsub_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vneg_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs,
                            size_t vl);
vint8mf4_t __riscv_vneg_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs,
                            size_t vl);
vint8mf2_t __riscv_vneg_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs,
                            size_t vl);
vint8m1_t __riscv_vneg_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vneg_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vneg_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vneg_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vneg_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs,
                             size_t vl);
vint16mf2_t __riscv_vneg_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs,
                             size_t vl);
vint16m1_t __riscv_vneg_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs,
                            size_t vl);
vint16m2_t __riscv_vneg_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs,
                            size_t vl);
vint16m4_t __riscv_vneg_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs,
                            size_t vl);
vint16m8_t __riscv_vneg_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs,
                            size_t vl);
vint32mf2_t __riscv_vneg_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs,
                             size_t vl);
vint32m1_t __riscv_vneg_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs,
                            size_t vl);
vint32m2_t __riscv_vneg_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs,
                            size_t vl);
vint32m4_t __riscv_vneg_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs,
                            size_t vl);
vint32m8_t __riscv_vneg_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs,
                            size_t vl);
vint64m1_t __riscv_vneg_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs,
                            size_t vl);
vint64m2_t __riscv_vneg_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs,
                            size_t vl);
vint64m4_t __riscv_vneg_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs,
                            size_t vl);
vint64m8_t __riscv_vneg_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs,
                            size_t vl);
vuint8mf8_t __riscv_vadd_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vadd_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vadd_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vadd_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vadd_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vadd_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vadd_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vadd_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vadd_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vadd_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vadd_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vadd_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vadd_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vadd_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vadd_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vadd_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vadd_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vadd_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vadd_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vadd_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vadd_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vadd_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vadd_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vadd_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vadd_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vadd_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vadd_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vadd_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vadd_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vadd_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vadd_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vadd_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vadd_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vadd_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vadd_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vadd_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vadd_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vadd_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vadd_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vadd_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vadd_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vadd_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vadd_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vadd_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vsub_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsub_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vsub_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsub_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vsub_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsub_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vsub_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsub_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vsub_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsub_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vsub_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsub_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vsub_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsub_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vsub_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsub_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vsub_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsub_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vsub_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsub_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vsub_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsub_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vsub_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsub_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vsub_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsub_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vsub_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsub_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vsub_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsub_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vsub_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsub_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vsub_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsub_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vsub_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsub_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vsub_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsub_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vsub_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsub_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vsub_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsub_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vsub_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsub_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vrsub_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vrsub_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vrsub_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vrsub_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vrsub_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vrsub_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vrsub_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vrsub_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vrsub_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vrsub_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vrsub_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vrsub_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vrsub_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vrsub_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vrsub_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vrsub_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vrsub_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vrsub_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vrsub_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vrsub_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vrsub_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vrsub_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vadd_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vadd_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vadd_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vadd_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vadd_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vadd_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vadd_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vadd_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vadd_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vadd_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vadd_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vadd_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vadd_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vadd_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vadd_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vadd_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vadd_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vadd_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vadd_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vadd_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vadd_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vadd_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vadd_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vadd_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vadd_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vadd_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vadd_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vadd_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vadd_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vadd_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vadd_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vadd_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vadd_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vadd_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vadd_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vadd_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vadd_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vadd_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vadd_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vadd_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vadd_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vadd_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vadd_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vadd_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vsub_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsub_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vsub_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsub_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vsub_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsub_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vsub_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsub_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vsub_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsub_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vsub_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsub_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vsub_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsub_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vsub_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsub_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vsub_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsub_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vsub_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsub_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vsub_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsub_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vsub_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsub_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vsub_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsub_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vsub_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsub_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vsub_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsub_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vsub_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsub_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vsub_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsub_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vsub_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsub_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vsub_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsub_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vsub_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsub_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vsub_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsub_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vsub_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsub_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vrsub_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                              int8_t rs1, size_t vl);
vint8mf4_t __riscv_vrsub_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                              int8_t rs1, size_t vl);
vint8mf2_t __riscv_vrsub_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                              int8_t rs1, size_t vl);
vint8m1_t __riscv_vrsub_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                             int8_t rs1, size_t vl);
vint8m2_t __riscv_vrsub_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                             int8_t rs1, size_t vl);
vint8m4_t __riscv_vrsub_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                             int8_t rs1, size_t vl);
vint8m8_t __riscv_vrsub_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                             int8_t rs1, size_t vl);
vint16mf4_t __riscv_vrsub_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                               int16_t rs1, size_t vl);
vint16mf2_t __riscv_vrsub_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                               int16_t rs1, size_t vl);
vint16m1_t __riscv_vrsub_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                              int16_t rs1, size_t vl);
vint16m2_t __riscv_vrsub_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                              int16_t rs1, size_t vl);
vint16m4_t __riscv_vrsub_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                              int16_t rs1, size_t vl);
vint16m8_t __riscv_vrsub_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                              int16_t rs1, size_t vl);
vint32mf2_t __riscv_vrsub_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                               int32_t rs1, size_t vl);
vint32m1_t __riscv_vrsub_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                              int32_t rs1, size_t vl);
vint32m2_t __riscv_vrsub_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                              int32_t rs1, size_t vl);
vint32m4_t __riscv_vrsub_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                              int32_t rs1, size_t vl);
vint32m8_t __riscv_vrsub_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                              int32_t rs1, size_t vl);
vint64m1_t __riscv_vrsub_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                              int64_t rs1, size_t vl);
vint64m2_t __riscv_vrsub_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                              int64_t rs1, size_t vl);
vint64m4_t __riscv_vrsub_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                              int64_t rs1, size_t vl);
vint64m8_t __riscv_vrsub_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                              int64_t rs1, size_t vl);
vint8mf8_t __riscv_vneg_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs,
                             size_t vl);
vint8mf4_t __riscv_vneg_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs,
                             size_t vl);
vint8mf2_t __riscv_vneg_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs,
                             size_t vl);
vint8m1_t __riscv_vneg_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vneg_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vneg_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vneg_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vneg_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs,
                              size_t vl);
vint16mf2_t __riscv_vneg_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs,
                              size_t vl);
vint16m1_t __riscv_vneg_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs,
                             size_t vl);
vint16m2_t __riscv_vneg_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs,
                             size_t vl);
vint16m4_t __riscv_vneg_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs,
                             size_t vl);
vint16m8_t __riscv_vneg_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs,
                             size_t vl);
vint32mf2_t __riscv_vneg_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs,
                              size_t vl);
vint32m1_t __riscv_vneg_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs,
                             size_t vl);
vint32m2_t __riscv_vneg_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs,
                             size_t vl);
vint32m4_t __riscv_vneg_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs,
                             size_t vl);
vint32m8_t __riscv_vneg_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs,
                             size_t vl);
vint64m1_t __riscv_vneg_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs,
                             size_t vl);
vint64m2_t __riscv_vneg_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs,
                             size_t vl);
vint64m4_t __riscv_vneg_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs,
                             size_t vl);
vint64m8_t __riscv_vneg_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs,
                             size_t vl);
vuint8mf8_t __riscv_vadd_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vadd_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vadd_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vadd_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vadd_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vadd_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vadd_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vadd_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vadd_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vadd_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vadd_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vadd_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vadd_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vadd_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vadd_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vadd_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vadd_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vadd_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vadd_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vadd_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vadd_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vadd_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vadd_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vadd_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vadd_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vadd_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vadd_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vadd_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vadd_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vadd_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vadd_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vadd_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vadd_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vadd_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vadd_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vadd_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vadd_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vadd_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vadd_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vadd_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vadd_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vadd_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vadd_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vadd_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vsub_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsub_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vsub_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsub_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vsub_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsub_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vsub_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsub_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vsub_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsub_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vsub_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsub_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vsub_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsub_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vsub_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsub_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vsub_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsub_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vsub_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsub_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vsub_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsub_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vsub_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsub_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vsub_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsub_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vsub_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsub_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vsub_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsub_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vsub_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsub_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vsub_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsub_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vsub_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsub_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vsub_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsub_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vsub_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsub_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vsub_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsub_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vsub_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsub_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vrsub_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vrsub_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vrsub_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vrsub_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vrsub_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vrsub_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vrsub_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vrsub_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vrsub_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vrsub_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vrsub_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vrsub_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vrsub_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vrsub_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vrsub_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vrsub_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vrsub_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vrsub_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vrsub_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vrsub_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vrsub_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vrsub_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vadd_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vadd_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vadd_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vadd_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vadd_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vadd_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vadd_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vadd_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vadd_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vadd_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vadd_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vadd_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vadd_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vadd_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vadd_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vadd_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vadd_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vadd_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vadd_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vadd_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vadd_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vadd_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vadd_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vadd_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vadd_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vadd_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vadd_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vadd_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vadd_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vadd_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vadd_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vadd_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vadd_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vadd_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vadd_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vadd_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vadd_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vadd_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vadd_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vadd_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vadd_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vadd_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vadd_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vadd_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vint8mf8_t __riscv_vsub_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsub_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vsub_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsub_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vsub_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsub_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vsub_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsub_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vsub_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsub_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vsub_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsub_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vsub_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsub_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vsub_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsub_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vsub_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsub_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vsub_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsub_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vsub_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsub_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vsub_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsub_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vsub_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsub_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vsub_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsub_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vsub_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsub_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vsub_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsub_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vsub_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsub_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vsub_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsub_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vsub_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsub_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vsub_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsub_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vsub_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsub_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vsub_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsub_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vint8mf8_t __riscv_vrsub_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vrsub_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vrsub_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vrsub_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vrsub_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vrsub_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vrsub_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vrsub_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vrsub_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vrsub_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vrsub_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vrsub_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vrsub_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vrsub_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vrsub_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vrsub_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vrsub_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vrsub_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vrsub_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vrsub_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vrsub_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vrsub_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vneg_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs,
                           size_t vl);
vint8mf4_t __riscv_vneg_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs,
                           size_t vl);
vint8mf2_t __riscv_vneg_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs,
                           size_t vl);
vint8m1_t __riscv_vneg_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vneg_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vneg_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vneg_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vneg_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs,
                            size_t vl);
vint16mf2_t __riscv_vneg_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs,
                            size_t vl);
vint16m1_t __riscv_vneg_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs,
                           size_t vl);
vint16m2_t __riscv_vneg_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs,
                           size_t vl);
vint16m4_t __riscv_vneg_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs,
                           size_t vl);
vint16m8_t __riscv_vneg_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs,
                           size_t vl);
vint32mf2_t __riscv_vneg_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs,
                            size_t vl);
vint32m1_t __riscv_vneg_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs,
                           size_t vl);
vint32m2_t __riscv_vneg_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs,
                           size_t vl);
vint32m4_t __riscv_vneg_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs,
                           size_t vl);
vint32m8_t __riscv_vneg_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs,
                           size_t vl);
vint64m1_t __riscv_vneg_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs,
                           size_t vl);
vint64m2_t __riscv_vneg_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs,
                           size_t vl);
vint64m4_t __riscv_vneg_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs,
                           size_t vl);
vint64m8_t __riscv_vneg_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs,
                           size_t vl);
vuint8mf8_t __riscv_vadd_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vadd_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vadd_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vadd_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vadd_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vadd_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vadd_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vadd_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vadd_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vadd_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vadd_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vadd_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vadd_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vadd_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vadd_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vadd_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vadd_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vadd_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vadd_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vadd_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vadd_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vadd_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vadd_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vadd_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vadd_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vadd_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vadd_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vadd_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vadd_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vadd_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vadd_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vadd_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vadd_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vadd_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vadd_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vadd_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vadd_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vadd_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vadd_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vadd_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vadd_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vadd_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vadd_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vadd_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vsub_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsub_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vsub_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsub_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vsub_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsub_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vsub_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsub_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vsub_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsub_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vsub_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsub_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vsub_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsub_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vsub_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsub_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vsub_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsub_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vsub_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsub_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vsub_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsub_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vsub_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsub_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vsub_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsub_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vsub_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsub_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vsub_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsub_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vsub_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsub_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vsub_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsub_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vsub_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsub_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vsub_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsub_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vsub_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsub_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vsub_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsub_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vsub_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsub_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vrsub_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vrsub_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vrsub_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vrsub_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vrsub_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vrsub_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vrsub_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vrsub_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vrsub_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vrsub_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vrsub_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vrsub_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vrsub_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vrsub_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vrsub_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vrsub_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vrsub_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vrsub_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vrsub_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vrsub_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vrsub_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vrsub_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);

Vector Widening Integer Add/Subtract Intrinsics

vint16mf4_t __riscv_vwadd_vv_tu(vint16mf4_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                                size_t vl);
vint16mf4_t __riscv_vwadd_vx_tu(vint16mf4_t vd, vint8mf8_t vs2, int8_t rs1,
                                size_t vl);
vint16mf4_t __riscv_vwadd_wv_tu(vint16mf4_t vd, vint16mf4_t vs2, vint8mf8_t vs1,
                                size_t vl);
vint16mf4_t __riscv_vwadd_wx_tu(vint16mf4_t vd, vint16mf4_t vs2, int8_t rs1,
                                size_t vl);
vint16mf2_t __riscv_vwadd_vv_tu(vint16mf2_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                                size_t vl);
vint16mf2_t __riscv_vwadd_vx_tu(vint16mf2_t vd, vint8mf4_t vs2, int8_t rs1,
                                size_t vl);
vint16mf2_t __riscv_vwadd_wv_tu(vint16mf2_t vd, vint16mf2_t vs2, vint8mf4_t vs1,
                                size_t vl);
vint16mf2_t __riscv_vwadd_wx_tu(vint16mf2_t vd, vint16mf2_t vs2, int8_t rs1,
                                size_t vl);
vint16m1_t __riscv_vwadd_vv_tu(vint16m1_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                               size_t vl);
vint16m1_t __riscv_vwadd_vx_tu(vint16m1_t vd, vint8mf2_t vs2, int8_t rs1,
                               size_t vl);
vint16m1_t __riscv_vwadd_wv_tu(vint16m1_t vd, vint16m1_t vs2, vint8mf2_t vs1,
                               size_t vl);
vint16m1_t __riscv_vwadd_wx_tu(vint16m1_t vd, vint16m1_t vs2, int8_t rs1,
                               size_t vl);
vint16m2_t __riscv_vwadd_vv_tu(vint16m2_t vd, vint8m1_t vs2, vint8m1_t vs1,
                               size_t vl);
vint16m2_t __riscv_vwadd_vx_tu(vint16m2_t vd, vint8m1_t vs2, int8_t rs1,
                               size_t vl);
vint16m2_t __riscv_vwadd_wv_tu(vint16m2_t vd, vint16m2_t vs2, vint8m1_t vs1,
                               size_t vl);
vint16m2_t __riscv_vwadd_wx_tu(vint16m2_t vd, vint16m2_t vs2, int8_t rs1,
                               size_t vl);
vint16m4_t __riscv_vwadd_vv_tu(vint16m4_t vd, vint8m2_t vs2, vint8m2_t vs1,
                               size_t vl);
vint16m4_t __riscv_vwadd_vx_tu(vint16m4_t vd, vint8m2_t vs2, int8_t rs1,
                               size_t vl);
vint16m4_t __riscv_vwadd_wv_tu(vint16m4_t vd, vint16m4_t vs2, vint8m2_t vs1,
                               size_t vl);
vint16m4_t __riscv_vwadd_wx_tu(vint16m4_t vd, vint16m4_t vs2, int8_t rs1,
                               size_t vl);
vint16m8_t __riscv_vwadd_vv_tu(vint16m8_t vd, vint8m4_t vs2, vint8m4_t vs1,
                               size_t vl);
vint16m8_t __riscv_vwadd_vx_tu(vint16m8_t vd, vint8m4_t vs2, int8_t rs1,
                               size_t vl);
vint16m8_t __riscv_vwadd_wv_tu(vint16m8_t vd, vint16m8_t vs2, vint8m4_t vs1,
                               size_t vl);
vint16m8_t __riscv_vwadd_wx_tu(vint16m8_t vd, vint16m8_t vs2, int8_t rs1,
                               size_t vl);
vint32mf2_t __riscv_vwadd_vv_tu(vint32mf2_t vd, vint16mf4_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_vx_tu(vint32mf2_t vd, vint16mf4_t vs2, int16_t rs1,
                                size_t vl);
vint32mf2_t __riscv_vwadd_wv_tu(vint32mf2_t vd, vint32mf2_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_wx_tu(vint32mf2_t vd, vint32mf2_t vs2, int16_t rs1,
                                size_t vl);
vint32m1_t __riscv_vwadd_vv_tu(vint32m1_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                               size_t vl);
vint32m1_t __riscv_vwadd_vx_tu(vint32m1_t vd, vint16mf2_t vs2, int16_t rs1,
                               size_t vl);
vint32m1_t __riscv_vwadd_wv_tu(vint32m1_t vd, vint32m1_t vs2, vint16mf2_t vs1,
                               size_t vl);
vint32m1_t __riscv_vwadd_wx_tu(vint32m1_t vd, vint32m1_t vs2, int16_t rs1,
                               size_t vl);
vint32m2_t __riscv_vwadd_vv_tu(vint32m2_t vd, vint16m1_t vs2, vint16m1_t vs1,
                               size_t vl);
vint32m2_t __riscv_vwadd_vx_tu(vint32m2_t vd, vint16m1_t vs2, int16_t rs1,
                               size_t vl);
vint32m2_t __riscv_vwadd_wv_tu(vint32m2_t vd, vint32m2_t vs2, vint16m1_t vs1,
                               size_t vl);
vint32m2_t __riscv_vwadd_wx_tu(vint32m2_t vd, vint32m2_t vs2, int16_t rs1,
                               size_t vl);
vint32m4_t __riscv_vwadd_vv_tu(vint32m4_t vd, vint16m2_t vs2, vint16m2_t vs1,
                               size_t vl);
vint32m4_t __riscv_vwadd_vx_tu(vint32m4_t vd, vint16m2_t vs2, int16_t rs1,
                               size_t vl);
vint32m4_t __riscv_vwadd_wv_tu(vint32m4_t vd, vint32m4_t vs2, vint16m2_t vs1,
                               size_t vl);
vint32m4_t __riscv_vwadd_wx_tu(vint32m4_t vd, vint32m4_t vs2, int16_t rs1,
                               size_t vl);
vint32m8_t __riscv_vwadd_vv_tu(vint32m8_t vd, vint16m4_t vs2, vint16m4_t vs1,
                               size_t vl);
vint32m8_t __riscv_vwadd_vx_tu(vint32m8_t vd, vint16m4_t vs2, int16_t rs1,
                               size_t vl);
vint32m8_t __riscv_vwadd_wv_tu(vint32m8_t vd, vint32m8_t vs2, vint16m4_t vs1,
                               size_t vl);
vint32m8_t __riscv_vwadd_wx_tu(vint32m8_t vd, vint32m8_t vs2, int16_t rs1,
                               size_t vl);
vint64m1_t __riscv_vwadd_vv_tu(vint64m1_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                               size_t vl);
vint64m1_t __riscv_vwadd_vx_tu(vint64m1_t vd, vint32mf2_t vs2, int32_t rs1,
                               size_t vl);
vint64m1_t __riscv_vwadd_wv_tu(vint64m1_t vd, vint64m1_t vs2, vint32mf2_t vs1,
                               size_t vl);
vint64m1_t __riscv_vwadd_wx_tu(vint64m1_t vd, vint64m1_t vs2, int32_t rs1,
                               size_t vl);
vint64m2_t __riscv_vwadd_vv_tu(vint64m2_t vd, vint32m1_t vs2, vint32m1_t vs1,
                               size_t vl);
vint64m2_t __riscv_vwadd_vx_tu(vint64m2_t vd, vint32m1_t vs2, int32_t rs1,
                               size_t vl);
vint64m2_t __riscv_vwadd_wv_tu(vint64m2_t vd, vint64m2_t vs2, vint32m1_t vs1,
                               size_t vl);
vint64m2_t __riscv_vwadd_wx_tu(vint64m2_t vd, vint64m2_t vs2, int32_t rs1,
                               size_t vl);
vint64m4_t __riscv_vwadd_vv_tu(vint64m4_t vd, vint32m2_t vs2, vint32m2_t vs1,
                               size_t vl);
vint64m4_t __riscv_vwadd_vx_tu(vint64m4_t vd, vint32m2_t vs2, int32_t rs1,
                               size_t vl);
vint64m4_t __riscv_vwadd_wv_tu(vint64m4_t vd, vint64m4_t vs2, vint32m2_t vs1,
                               size_t vl);
vint64m4_t __riscv_vwadd_wx_tu(vint64m4_t vd, vint64m4_t vs2, int32_t rs1,
                               size_t vl);
vint64m8_t __riscv_vwadd_vv_tu(vint64m8_t vd, vint32m4_t vs2, vint32m4_t vs1,
                               size_t vl);
vint64m8_t __riscv_vwadd_vx_tu(vint64m8_t vd, vint32m4_t vs2, int32_t rs1,
                               size_t vl);
vint64m8_t __riscv_vwadd_wv_tu(vint64m8_t vd, vint64m8_t vs2, vint32m4_t vs1,
                               size_t vl);
vint64m8_t __riscv_vwadd_wx_tu(vint64m8_t vd, vint64m8_t vs2, int32_t rs1,
                               size_t vl);
vint16mf4_t __riscv_vwsub_vv_tu(vint16mf4_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                                size_t vl);
vint16mf4_t __riscv_vwsub_vx_tu(vint16mf4_t vd, vint8mf8_t vs2, int8_t rs1,
                                size_t vl);
vint16mf4_t __riscv_vwsub_wv_tu(vint16mf4_t vd, vint16mf4_t vs2, vint8mf8_t vs1,
                                size_t vl);
vint16mf4_t __riscv_vwsub_wx_tu(vint16mf4_t vd, vint16mf4_t vs2, int8_t rs1,
                                size_t vl);
vint16mf2_t __riscv_vwsub_vv_tu(vint16mf2_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                                size_t vl);
vint16mf2_t __riscv_vwsub_vx_tu(vint16mf2_t vd, vint8mf4_t vs2, int8_t rs1,
                                size_t vl);
vint16mf2_t __riscv_vwsub_wv_tu(vint16mf2_t vd, vint16mf2_t vs2, vint8mf4_t vs1,
                                size_t vl);
vint16mf2_t __riscv_vwsub_wx_tu(vint16mf2_t vd, vint16mf2_t vs2, int8_t rs1,
                                size_t vl);
vint16m1_t __riscv_vwsub_vv_tu(vint16m1_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                               size_t vl);
vint16m1_t __riscv_vwsub_vx_tu(vint16m1_t vd, vint8mf2_t vs2, int8_t rs1,
                               size_t vl);
vint16m1_t __riscv_vwsub_wv_tu(vint16m1_t vd, vint16m1_t vs2, vint8mf2_t vs1,
                               size_t vl);
vint16m1_t __riscv_vwsub_wx_tu(vint16m1_t vd, vint16m1_t vs2, int8_t rs1,
                               size_t vl);
vint16m2_t __riscv_vwsub_vv_tu(vint16m2_t vd, vint8m1_t vs2, vint8m1_t vs1,
                               size_t vl);
vint16m2_t __riscv_vwsub_vx_tu(vint16m2_t vd, vint8m1_t vs2, int8_t rs1,
                               size_t vl);
vint16m2_t __riscv_vwsub_wv_tu(vint16m2_t vd, vint16m2_t vs2, vint8m1_t vs1,
                               size_t vl);
vint16m2_t __riscv_vwsub_wx_tu(vint16m2_t vd, vint16m2_t vs2, int8_t rs1,
                               size_t vl);
vint16m4_t __riscv_vwsub_vv_tu(vint16m4_t vd, vint8m2_t vs2, vint8m2_t vs1,
                               size_t vl);
vint16m4_t __riscv_vwsub_vx_tu(vint16m4_t vd, vint8m2_t vs2, int8_t rs1,
                               size_t vl);
vint16m4_t __riscv_vwsub_wv_tu(vint16m4_t vd, vint16m4_t vs2, vint8m2_t vs1,
                               size_t vl);
vint16m4_t __riscv_vwsub_wx_tu(vint16m4_t vd, vint16m4_t vs2, int8_t rs1,
                               size_t vl);
vint16m8_t __riscv_vwsub_vv_tu(vint16m8_t vd, vint8m4_t vs2, vint8m4_t vs1,
                               size_t vl);
vint16m8_t __riscv_vwsub_vx_tu(vint16m8_t vd, vint8m4_t vs2, int8_t rs1,
                               size_t vl);
vint16m8_t __riscv_vwsub_wv_tu(vint16m8_t vd, vint16m8_t vs2, vint8m4_t vs1,
                               size_t vl);
vint16m8_t __riscv_vwsub_wx_tu(vint16m8_t vd, vint16m8_t vs2, int8_t rs1,
                               size_t vl);
vint32mf2_t __riscv_vwsub_vv_tu(vint32mf2_t vd, vint16mf4_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_vx_tu(vint32mf2_t vd, vint16mf4_t vs2, int16_t rs1,
                                size_t vl);
vint32mf2_t __riscv_vwsub_wv_tu(vint32mf2_t vd, vint32mf2_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_wx_tu(vint32mf2_t vd, vint32mf2_t vs2, int16_t rs1,
                                size_t vl);
vint32m1_t __riscv_vwsub_vv_tu(vint32m1_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                               size_t vl);
vint32m1_t __riscv_vwsub_vx_tu(vint32m1_t vd, vint16mf2_t vs2, int16_t rs1,
                               size_t vl);
vint32m1_t __riscv_vwsub_wv_tu(vint32m1_t vd, vint32m1_t vs2, vint16mf2_t vs1,
                               size_t vl);
vint32m1_t __riscv_vwsub_wx_tu(vint32m1_t vd, vint32m1_t vs2, int16_t rs1,
                               size_t vl);
vint32m2_t __riscv_vwsub_vv_tu(vint32m2_t vd, vint16m1_t vs2, vint16m1_t vs1,
                               size_t vl);
vint32m2_t __riscv_vwsub_vx_tu(vint32m2_t vd, vint16m1_t vs2, int16_t rs1,
                               size_t vl);
vint32m2_t __riscv_vwsub_wv_tu(vint32m2_t vd, vint32m2_t vs2, vint16m1_t vs1,
                               size_t vl);
vint32m2_t __riscv_vwsub_wx_tu(vint32m2_t vd, vint32m2_t vs2, int16_t rs1,
                               size_t vl);
vint32m4_t __riscv_vwsub_vv_tu(vint32m4_t vd, vint16m2_t vs2, vint16m2_t vs1,
                               size_t vl);
vint32m4_t __riscv_vwsub_vx_tu(vint32m4_t vd, vint16m2_t vs2, int16_t rs1,
                               size_t vl);
vint32m4_t __riscv_vwsub_wv_tu(vint32m4_t vd, vint32m4_t vs2, vint16m2_t vs1,
                               size_t vl);
vint32m4_t __riscv_vwsub_wx_tu(vint32m4_t vd, vint32m4_t vs2, int16_t rs1,
                               size_t vl);
vint32m8_t __riscv_vwsub_vv_tu(vint32m8_t vd, vint16m4_t vs2, vint16m4_t vs1,
                               size_t vl);
vint32m8_t __riscv_vwsub_vx_tu(vint32m8_t vd, vint16m4_t vs2, int16_t rs1,
                               size_t vl);
vint32m8_t __riscv_vwsub_wv_tu(vint32m8_t vd, vint32m8_t vs2, vint16m4_t vs1,
                               size_t vl);
vint32m8_t __riscv_vwsub_wx_tu(vint32m8_t vd, vint32m8_t vs2, int16_t rs1,
                               size_t vl);
vint64m1_t __riscv_vwsub_vv_tu(vint64m1_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                               size_t vl);
vint64m1_t __riscv_vwsub_vx_tu(vint64m1_t vd, vint32mf2_t vs2, int32_t rs1,
                               size_t vl);
vint64m1_t __riscv_vwsub_wv_tu(vint64m1_t vd, vint64m1_t vs2, vint32mf2_t vs1,
                               size_t vl);
vint64m1_t __riscv_vwsub_wx_tu(vint64m1_t vd, vint64m1_t vs2, int32_t rs1,
                               size_t vl);
vint64m2_t __riscv_vwsub_vv_tu(vint64m2_t vd, vint32m1_t vs2, vint32m1_t vs1,
                               size_t vl);
vint64m2_t __riscv_vwsub_vx_tu(vint64m2_t vd, vint32m1_t vs2, int32_t rs1,
                               size_t vl);
vint64m2_t __riscv_vwsub_wv_tu(vint64m2_t vd, vint64m2_t vs2, vint32m1_t vs1,
                               size_t vl);
vint64m2_t __riscv_vwsub_wx_tu(vint64m2_t vd, vint64m2_t vs2, int32_t rs1,
                               size_t vl);
vint64m4_t __riscv_vwsub_vv_tu(vint64m4_t vd, vint32m2_t vs2, vint32m2_t vs1,
                               size_t vl);
vint64m4_t __riscv_vwsub_vx_tu(vint64m4_t vd, vint32m2_t vs2, int32_t rs1,
                               size_t vl);
vint64m4_t __riscv_vwsub_wv_tu(vint64m4_t vd, vint64m4_t vs2, vint32m2_t vs1,
                               size_t vl);
vint64m4_t __riscv_vwsub_wx_tu(vint64m4_t vd, vint64m4_t vs2, int32_t rs1,
                               size_t vl);
vint64m8_t __riscv_vwsub_vv_tu(vint64m8_t vd, vint32m4_t vs2, vint32m4_t vs1,
                               size_t vl);
vint64m8_t __riscv_vwsub_vx_tu(vint64m8_t vd, vint32m4_t vs2, int32_t rs1,
                               size_t vl);
vint64m8_t __riscv_vwsub_wv_tu(vint64m8_t vd, vint64m8_t vs2, vint32m4_t vs1,
                               size_t vl);
vint64m8_t __riscv_vwsub_wx_tu(vint64m8_t vd, vint64m8_t vs2, int32_t rs1,
                               size_t vl);
vuint16mf4_t __riscv_vwaddu_vv_tu(vuint16mf4_t vd, vuint8mf8_t vs2,
                                  vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_vx_tu(vuint16mf4_t vd, vuint8mf8_t vs2, uint8_t rs1,
                                  size_t vl);
vuint16mf4_t __riscv_vwaddu_wv_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                  vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_wx_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_vv_tu(vuint16mf2_t vd, vuint8mf4_t vs2,
                                  vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_vx_tu(vuint16mf2_t vd, vuint8mf4_t vs2, uint8_t rs1,
                                  size_t vl);
vuint16mf2_t __riscv_vwaddu_wv_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                  vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_wx_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vv_tu(vuint16m1_t vd, vuint8mf2_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vx_tu(vuint16m1_t vd, vuint8mf2_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m1_t __riscv_vwaddu_wv_tu(vuint16m1_t vd, vuint16m1_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_wx_tu(vuint16m1_t vd, vuint16m1_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m2_t __riscv_vwaddu_vv_tu(vuint16m2_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                                 size_t vl);
vuint16m2_t __riscv_vwaddu_vx_tu(vuint16m2_t vd, vuint8m1_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m2_t __riscv_vwaddu_wv_tu(vuint16m2_t vd, vuint16m2_t vs2,
                                 vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwaddu_wx_tu(vuint16m2_t vd, vuint16m2_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m4_t __riscv_vwaddu_vv_tu(vuint16m4_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                                 size_t vl);
vuint16m4_t __riscv_vwaddu_vx_tu(vuint16m4_t vd, vuint8m2_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m4_t __riscv_vwaddu_wv_tu(vuint16m4_t vd, vuint16m4_t vs2,
                                 vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwaddu_wx_tu(vuint16m4_t vd, vuint16m4_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m8_t __riscv_vwaddu_vv_tu(vuint16m8_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                                 size_t vl);
vuint16m8_t __riscv_vwaddu_vx_tu(vuint16m8_t vd, vuint8m4_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m8_t __riscv_vwaddu_wv_tu(vuint16m8_t vd, vuint16m8_t vs2,
                                 vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwaddu_wx_tu(vuint16m8_t vd, vuint16m8_t vs2, uint8_t rs1,
                                 size_t vl);
vuint32mf2_t __riscv_vwaddu_vv_tu(vuint32mf2_t vd, vuint16mf4_t vs2,
                                  vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_vx_tu(vuint32mf2_t vd, vuint16mf4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_wv_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                  vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_wx_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwaddu_vv_tu(vuint32m1_t vd, vuint16mf2_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwaddu_vx_tu(vuint32m1_t vd, vuint16mf2_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m1_t __riscv_vwaddu_wv_tu(vuint32m1_t vd, vuint32m1_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwaddu_wx_tu(vuint32m1_t vd, vuint32m1_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m2_t __riscv_vwaddu_vv_tu(vuint32m2_t vd, vuint16m1_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_vx_tu(vuint32m2_t vd, vuint16m1_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m2_t __riscv_vwaddu_wv_tu(vuint32m2_t vd, vuint32m2_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_wx_tu(vuint32m2_t vd, vuint32m2_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m4_t __riscv_vwaddu_vv_tu(vuint32m4_t vd, vuint16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_vx_tu(vuint32m4_t vd, vuint16m2_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m4_t __riscv_vwaddu_wv_tu(vuint32m4_t vd, vuint32m4_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_wx_tu(vuint32m4_t vd, vuint32m4_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m8_t __riscv_vwaddu_vv_tu(vuint32m8_t vd, vuint16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_vx_tu(vuint32m8_t vd, vuint16m4_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m8_t __riscv_vwaddu_wv_tu(vuint32m8_t vd, vuint32m8_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_wx_tu(vuint32m8_t vd, vuint32m8_t vs2, uint16_t rs1,
                                 size_t vl);
vuint64m1_t __riscv_vwaddu_vv_tu(vuint64m1_t vd, vuint32mf2_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwaddu_vx_tu(vuint64m1_t vd, vuint32mf2_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m1_t __riscv_vwaddu_wv_tu(vuint64m1_t vd, vuint64m1_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwaddu_wx_tu(vuint64m1_t vd, vuint64m1_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m2_t __riscv_vwaddu_vv_tu(vuint64m2_t vd, vuint32m1_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_vx_tu(vuint64m2_t vd, vuint32m1_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m2_t __riscv_vwaddu_wv_tu(vuint64m2_t vd, vuint64m2_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_wx_tu(vuint64m2_t vd, vuint64m2_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m4_t __riscv_vwaddu_vv_tu(vuint64m4_t vd, vuint32m2_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_vx_tu(vuint64m4_t vd, vuint32m2_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m4_t __riscv_vwaddu_wv_tu(vuint64m4_t vd, vuint64m4_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_wx_tu(vuint64m4_t vd, vuint64m4_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m8_t __riscv_vwaddu_vv_tu(vuint64m8_t vd, vuint32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_vx_tu(vuint64m8_t vd, vuint32m4_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m8_t __riscv_vwaddu_wv_tu(vuint64m8_t vd, vuint64m8_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_wx_tu(vuint64m8_t vd, vuint64m8_t vs2, uint32_t rs1,
                                 size_t vl);
vuint16mf4_t __riscv_vwsubu_vv_tu(vuint16mf4_t vd, vuint8mf8_t vs2,
                                  vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_vx_tu(vuint16mf4_t vd, vuint8mf8_t vs2, uint8_t rs1,
                                  size_t vl);
vuint16mf4_t __riscv_vwsubu_wv_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                  vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_wx_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_vv_tu(vuint16mf2_t vd, vuint8mf4_t vs2,
                                  vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_vx_tu(vuint16mf2_t vd, vuint8mf4_t vs2, uint8_t rs1,
                                  size_t vl);
vuint16mf2_t __riscv_vwsubu_wv_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                  vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_wx_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vv_tu(vuint16m1_t vd, vuint8mf2_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vx_tu(vuint16m1_t vd, vuint8mf2_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m1_t __riscv_vwsubu_wv_tu(vuint16m1_t vd, vuint16m1_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_wx_tu(vuint16m1_t vd, vuint16m1_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m2_t __riscv_vwsubu_vv_tu(vuint16m2_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                                 size_t vl);
vuint16m2_t __riscv_vwsubu_vx_tu(vuint16m2_t vd, vuint8m1_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m2_t __riscv_vwsubu_wv_tu(vuint16m2_t vd, vuint16m2_t vs2,
                                 vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwsubu_wx_tu(vuint16m2_t vd, vuint16m2_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m4_t __riscv_vwsubu_vv_tu(vuint16m4_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                                 size_t vl);
vuint16m4_t __riscv_vwsubu_vx_tu(vuint16m4_t vd, vuint8m2_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m4_t __riscv_vwsubu_wv_tu(vuint16m4_t vd, vuint16m4_t vs2,
                                 vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwsubu_wx_tu(vuint16m4_t vd, vuint16m4_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m8_t __riscv_vwsubu_vv_tu(vuint16m8_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                                 size_t vl);
vuint16m8_t __riscv_vwsubu_vx_tu(vuint16m8_t vd, vuint8m4_t vs2, uint8_t rs1,
                                 size_t vl);
vuint16m8_t __riscv_vwsubu_wv_tu(vuint16m8_t vd, vuint16m8_t vs2,
                                 vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwsubu_wx_tu(vuint16m8_t vd, vuint16m8_t vs2, uint8_t rs1,
                                 size_t vl);
vuint32mf2_t __riscv_vwsubu_vv_tu(vuint32mf2_t vd, vuint16mf4_t vs2,
                                  vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_vx_tu(vuint32mf2_t vd, vuint16mf4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_wv_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                  vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_wx_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwsubu_vv_tu(vuint32m1_t vd, vuint16mf2_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwsubu_vx_tu(vuint32m1_t vd, vuint16mf2_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m1_t __riscv_vwsubu_wv_tu(vuint32m1_t vd, vuint32m1_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwsubu_wx_tu(vuint32m1_t vd, vuint32m1_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m2_t __riscv_vwsubu_vv_tu(vuint32m2_t vd, vuint16m1_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_vx_tu(vuint32m2_t vd, vuint16m1_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m2_t __riscv_vwsubu_wv_tu(vuint32m2_t vd, vuint32m2_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_wx_tu(vuint32m2_t vd, vuint32m2_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m4_t __riscv_vwsubu_vv_tu(vuint32m4_t vd, vuint16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_vx_tu(vuint32m4_t vd, vuint16m2_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m4_t __riscv_vwsubu_wv_tu(vuint32m4_t vd, vuint32m4_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_wx_tu(vuint32m4_t vd, vuint32m4_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m8_t __riscv_vwsubu_vv_tu(vuint32m8_t vd, vuint16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_vx_tu(vuint32m8_t vd, vuint16m4_t vs2, uint16_t rs1,
                                 size_t vl);
vuint32m8_t __riscv_vwsubu_wv_tu(vuint32m8_t vd, vuint32m8_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_wx_tu(vuint32m8_t vd, vuint32m8_t vs2, uint16_t rs1,
                                 size_t vl);
vuint64m1_t __riscv_vwsubu_vv_tu(vuint64m1_t vd, vuint32mf2_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwsubu_vx_tu(vuint64m1_t vd, vuint32mf2_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m1_t __riscv_vwsubu_wv_tu(vuint64m1_t vd, vuint64m1_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwsubu_wx_tu(vuint64m1_t vd, vuint64m1_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m2_t __riscv_vwsubu_vv_tu(vuint64m2_t vd, vuint32m1_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_vx_tu(vuint64m2_t vd, vuint32m1_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m2_t __riscv_vwsubu_wv_tu(vuint64m2_t vd, vuint64m2_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_wx_tu(vuint64m2_t vd, vuint64m2_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m4_t __riscv_vwsubu_vv_tu(vuint64m4_t vd, vuint32m2_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_vx_tu(vuint64m4_t vd, vuint32m2_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m4_t __riscv_vwsubu_wv_tu(vuint64m4_t vd, vuint64m4_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_wx_tu(vuint64m4_t vd, vuint64m4_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m8_t __riscv_vwsubu_vv_tu(vuint64m8_t vd, vuint32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_vx_tu(vuint64m8_t vd, vuint32m4_t vs2, uint32_t rs1,
                                 size_t vl);
vuint64m8_t __riscv_vwsubu_wv_tu(vuint64m8_t vd, vuint64m8_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_wx_tu(vuint64m8_t vd, vuint64m8_t vs2, uint32_t rs1,
                                 size_t vl);
// masked functions
vint16mf4_t __riscv_vwadd_vv_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwadd_vx_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf4_t __riscv_vwadd_wv_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwadd_wx_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwadd_vv_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwadd_vx_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwadd_wv_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwadd_wx_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m1_t __riscv_vwadd_vv_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwadd_vx_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                int8_t rs1, size_t vl);
vint16m1_t __riscv_vwadd_wv_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwadd_wx_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                int8_t rs1, size_t vl);
vint16m2_t __riscv_vwadd_vv_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwadd_vx_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                int8_t rs1, size_t vl);
vint16m2_t __riscv_vwadd_wv_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwadd_wx_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                int8_t rs1, size_t vl);
vint16m4_t __riscv_vwadd_vv_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwadd_vx_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                int8_t rs1, size_t vl);
vint16m4_t __riscv_vwadd_wv_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwadd_wx_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                int8_t rs1, size_t vl);
vint16m8_t __riscv_vwadd_vv_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwadd_vx_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                int8_t rs1, size_t vl);
vint16m8_t __riscv_vwadd_wv_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwadd_wx_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwadd_vv_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_vx_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 int16_t rs1, size_t vl);
vint32mf2_t __riscv_vwadd_wv_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_wx_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m1_t __riscv_vwadd_vv_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwadd_vx_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                int16_t rs1, size_t vl);
vint32m1_t __riscv_vwadd_wv_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwadd_wx_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                int16_t rs1, size_t vl);
vint32m2_t __riscv_vwadd_vv_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwadd_vx_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                int16_t rs1, size_t vl);
vint32m2_t __riscv_vwadd_wv_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwadd_wx_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                int16_t rs1, size_t vl);
vint32m4_t __riscv_vwadd_vv_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwadd_vx_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                int16_t rs1, size_t vl);
vint32m4_t __riscv_vwadd_wv_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwadd_wx_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                int16_t rs1, size_t vl);
vint32m8_t __riscv_vwadd_vv_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwadd_vx_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                int16_t rs1, size_t vl);
vint32m8_t __riscv_vwadd_wv_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwadd_wx_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                int16_t rs1, size_t vl);
vint64m1_t __riscv_vwadd_vv_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwadd_vx_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                int32_t rs1, size_t vl);
vint64m1_t __riscv_vwadd_wv_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwadd_wx_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                int32_t rs1, size_t vl);
vint64m2_t __riscv_vwadd_vv_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwadd_vx_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                int32_t rs1, size_t vl);
vint64m2_t __riscv_vwadd_wv_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwadd_wx_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                int32_t rs1, size_t vl);
vint64m4_t __riscv_vwadd_vv_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwadd_vx_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                int32_t rs1, size_t vl);
vint64m4_t __riscv_vwadd_wv_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwadd_wx_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                int32_t rs1, size_t vl);
vint64m8_t __riscv_vwadd_vv_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwadd_vx_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                int32_t rs1, size_t vl);
vint64m8_t __riscv_vwadd_wv_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwadd_wx_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                int32_t rs1, size_t vl);
vint16mf4_t __riscv_vwsub_vv_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwsub_vx_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf4_t __riscv_vwsub_wv_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwsub_wx_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwsub_vv_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwsub_vx_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwsub_wv_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwsub_wx_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m1_t __riscv_vwsub_vv_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwsub_vx_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                int8_t rs1, size_t vl);
vint16m1_t __riscv_vwsub_wv_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwsub_wx_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                int8_t rs1, size_t vl);
vint16m2_t __riscv_vwsub_vv_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwsub_vx_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                int8_t rs1, size_t vl);
vint16m2_t __riscv_vwsub_wv_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwsub_wx_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                int8_t rs1, size_t vl);
vint16m4_t __riscv_vwsub_vv_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwsub_vx_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                int8_t rs1, size_t vl);
vint16m4_t __riscv_vwsub_wv_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwsub_wx_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                int8_t rs1, size_t vl);
vint16m8_t __riscv_vwsub_vv_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwsub_vx_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                int8_t rs1, size_t vl);
vint16m8_t __riscv_vwsub_wv_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwsub_wx_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwsub_vv_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_vx_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 int16_t rs1, size_t vl);
vint32mf2_t __riscv_vwsub_wv_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_wx_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m1_t __riscv_vwsub_vv_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwsub_vx_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                int16_t rs1, size_t vl);
vint32m1_t __riscv_vwsub_wv_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwsub_wx_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                int16_t rs1, size_t vl);
vint32m2_t __riscv_vwsub_vv_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwsub_vx_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                int16_t rs1, size_t vl);
vint32m2_t __riscv_vwsub_wv_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwsub_wx_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                int16_t rs1, size_t vl);
vint32m4_t __riscv_vwsub_vv_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwsub_vx_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                int16_t rs1, size_t vl);
vint32m4_t __riscv_vwsub_wv_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwsub_wx_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                int16_t rs1, size_t vl);
vint32m8_t __riscv_vwsub_vv_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwsub_vx_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                int16_t rs1, size_t vl);
vint32m8_t __riscv_vwsub_wv_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwsub_wx_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                int16_t rs1, size_t vl);
vint64m1_t __riscv_vwsub_vv_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwsub_vx_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                int32_t rs1, size_t vl);
vint64m1_t __riscv_vwsub_wv_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwsub_wx_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                int32_t rs1, size_t vl);
vint64m2_t __riscv_vwsub_vv_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwsub_vx_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                int32_t rs1, size_t vl);
vint64m2_t __riscv_vwsub_wv_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwsub_wx_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                int32_t rs1, size_t vl);
vint64m4_t __riscv_vwsub_vv_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwsub_vx_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                int32_t rs1, size_t vl);
vint64m4_t __riscv_vwsub_wv_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwsub_wx_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                int32_t rs1, size_t vl);
vint64m8_t __riscv_vwsub_vv_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwsub_vx_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                int32_t rs1, size_t vl);
vint64m8_t __riscv_vwsub_wv_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwsub_wx_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                int32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_vv_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint8mf8_t vs2, vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_vx_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_wv_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, vuint8mf8_t vs1,
                                   size_t vl);
vuint16mf4_t __riscv_vwaddu_wx_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_vv_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint8mf4_t vs2, vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_vx_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_wv_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, vuint8mf4_t vs1,
                                   size_t vl);
vuint16mf2_t __riscv_vwaddu_wx_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vv_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                  vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vx_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwaddu_wv_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_wx_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwaddu_vv_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                  vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwaddu_vx_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwaddu_wv_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwaddu_wx_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwaddu_vv_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                  vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwaddu_vx_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwaddu_wv_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwaddu_wx_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwaddu_vv_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                  vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwaddu_vx_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwaddu_wv_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwaddu_wx_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_vv_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint16mf4_t vs2, vuint16mf4_t vs1,
                                   size_t vl);
vuint32mf2_t __riscv_vwaddu_vx_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_wv_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, vuint16mf4_t vs1,
                                   size_t vl);
vuint32mf2_t __riscv_vwaddu_wx_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwaddu_vv_tum(vbool32_t vm, vuint32m1_t vd,
                                  vuint16mf2_t vs2, vuint16mf2_t vs1,
                                  size_t vl);
vuint32m1_t __riscv_vwaddu_vx_tum(vbool32_t vm, vuint32m1_t vd,
                                  vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwaddu_wv_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwaddu_wx_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwaddu_vv_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                  vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_vx_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwaddu_wv_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_wx_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwaddu_vv_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                  vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_vx_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwaddu_wv_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_wx_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwaddu_vv_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                  vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_vx_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwaddu_wv_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_wx_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwaddu_vv_tum(vbool64_t vm, vuint64m1_t vd,
                                  vuint32mf2_t vs2, vuint32mf2_t vs1,
                                  size_t vl);
vuint64m1_t __riscv_vwaddu_vx_tum(vbool64_t vm, vuint64m1_t vd,
                                  vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vwaddu_wv_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwaddu_wx_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwaddu_vv_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                  vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_vx_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwaddu_wv_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_wx_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwaddu_vv_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                  vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_vx_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwaddu_wv_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_wx_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwaddu_vv_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                  vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_vx_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwaddu_wv_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_wx_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  uint32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_vv_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint8mf8_t vs2, vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_vx_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_wv_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, vuint8mf8_t vs1,
                                   size_t vl);
vuint16mf4_t __riscv_vwsubu_wx_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint16mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_vv_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint8mf4_t vs2, vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_vx_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_wv_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, vuint8mf4_t vs1,
                                   size_t vl);
vuint16mf2_t __riscv_vwsubu_wx_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint16mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vv_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                  vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vx_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwsubu_wv_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_wx_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwsubu_vv_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                  vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwsubu_vx_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwsubu_wv_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwsubu_wx_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwsubu_vv_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                  vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwsubu_vx_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwsubu_wv_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwsubu_wx_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwsubu_vv_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                  vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwsubu_vx_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                  uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwsubu_wv_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwsubu_wx_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                  uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_vv_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint16mf4_t vs2, vuint16mf4_t vs1,
                                   size_t vl);
vuint32mf2_t __riscv_vwsubu_vx_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_wv_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, vuint16mf4_t vs1,
                                   size_t vl);
vuint32mf2_t __riscv_vwsubu_wx_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint32mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwsubu_vv_tum(vbool32_t vm, vuint32m1_t vd,
                                  vuint16mf2_t vs2, vuint16mf2_t vs1,
                                  size_t vl);
vuint32m1_t __riscv_vwsubu_vx_tum(vbool32_t vm, vuint32m1_t vd,
                                  vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwsubu_wv_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwsubu_wx_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwsubu_vv_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                  vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_vx_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwsubu_wv_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_wx_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwsubu_vv_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                  vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_vx_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwsubu_wv_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_wx_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwsubu_vv_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                  vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_vx_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                  uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwsubu_wv_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_wx_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                  uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwsubu_vv_tum(vbool64_t vm, vuint64m1_t vd,
                                  vuint32mf2_t vs2, vuint32mf2_t vs1,
                                  size_t vl);
vuint64m1_t __riscv_vwsubu_vx_tum(vbool64_t vm, vuint64m1_t vd,
                                  vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vwsubu_wv_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwsubu_wx_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwsubu_vv_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                  vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_vx_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwsubu_wv_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_wx_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwsubu_vv_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                  vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_vx_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwsubu_wv_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_wx_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwsubu_vv_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                  vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_vx_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                  uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwsubu_wv_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_wx_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                  uint32_t rs1, size_t vl);
// masked functions
vint16mf4_t __riscv_vwadd_vv_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                  vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwadd_vx_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                  int8_t rs1, size_t vl);
vint16mf4_t __riscv_vwadd_wv_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwadd_wx_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwadd_vv_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                  vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwadd_vx_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                  int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwadd_wv_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwadd_wx_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  int8_t rs1, size_t vl);
vint16m1_t __riscv_vwadd_vv_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                 vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwadd_vx_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m1_t __riscv_vwadd_wv_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwadd_wx_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 int8_t rs1, size_t vl);
vint16m2_t __riscv_vwadd_vv_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                 vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwadd_vx_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                 int8_t rs1, size_t vl);
vint16m2_t __riscv_vwadd_wv_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwadd_wx_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m4_t __riscv_vwadd_vv_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                 vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwadd_vx_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m4_t __riscv_vwadd_wv_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwadd_wx_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 int8_t rs1, size_t vl);
vint16m8_t __riscv_vwadd_vv_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                 vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwadd_vx_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                 int8_t rs1, size_t vl);
vint16m8_t __riscv_vwadd_wv_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwadd_wx_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwadd_vv_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                  vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_vx_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                  int16_t rs1, size_t vl);
vint32mf2_t __riscv_vwadd_wv_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_wx_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  int16_t rs1, size_t vl);
vint32m1_t __riscv_vwadd_vv_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                 vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwadd_vx_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m1_t __riscv_vwadd_wv_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwadd_wx_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 int16_t rs1, size_t vl);
vint32m2_t __riscv_vwadd_vv_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                 vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwadd_vx_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                 int16_t rs1, size_t vl);
vint32m2_t __riscv_vwadd_wv_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwadd_wx_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m4_t __riscv_vwadd_vv_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                 vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwadd_vx_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m4_t __riscv_vwadd_wv_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwadd_wx_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 int16_t rs1, size_t vl);
vint32m8_t __riscv_vwadd_vv_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                 vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwadd_vx_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                 int16_t rs1, size_t vl);
vint32m8_t __riscv_vwadd_wv_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwadd_wx_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 int16_t rs1, size_t vl);
vint64m1_t __riscv_vwadd_vv_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                 vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwadd_vx_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                 int32_t rs1, size_t vl);
vint64m1_t __riscv_vwadd_wv_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwadd_wx_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 int32_t rs1, size_t vl);
vint64m2_t __riscv_vwadd_vv_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                 vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwadd_vx_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                 int32_t rs1, size_t vl);
vint64m2_t __riscv_vwadd_wv_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwadd_wx_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 int32_t rs1, size_t vl);
vint64m4_t __riscv_vwadd_vv_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                 vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwadd_vx_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                 int32_t rs1, size_t vl);
vint64m4_t __riscv_vwadd_wv_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwadd_wx_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 int32_t rs1, size_t vl);
vint64m8_t __riscv_vwadd_vv_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                 vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwadd_vx_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                 int32_t rs1, size_t vl);
vint64m8_t __riscv_vwadd_wv_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwadd_wx_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 int32_t rs1, size_t vl);
vint16mf4_t __riscv_vwsub_vv_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                  vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwsub_vx_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                  int8_t rs1, size_t vl);
vint16mf4_t __riscv_vwsub_wv_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwsub_wx_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                  int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwsub_vv_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                  vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwsub_vx_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                  int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwsub_wv_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwsub_wx_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                  int8_t rs1, size_t vl);
vint16m1_t __riscv_vwsub_vv_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                 vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwsub_vx_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m1_t __riscv_vwsub_wv_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwsub_wx_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                 int8_t rs1, size_t vl);
vint16m2_t __riscv_vwsub_vv_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                 vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwsub_vx_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                 int8_t rs1, size_t vl);
vint16m2_t __riscv_vwsub_wv_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwsub_wx_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m4_t __riscv_vwsub_vv_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                 vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwsub_vx_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                 int8_t rs1, size_t vl);
vint16m4_t __riscv_vwsub_wv_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwsub_wx_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                 int8_t rs1, size_t vl);
vint16m8_t __riscv_vwsub_vv_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                 vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwsub_vx_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                 int8_t rs1, size_t vl);
vint16m8_t __riscv_vwsub_wv_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwsub_wx_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                 int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwsub_vv_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                  vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_vx_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                  int16_t rs1, size_t vl);
vint32mf2_t __riscv_vwsub_wv_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_wx_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                  int16_t rs1, size_t vl);
vint32m1_t __riscv_vwsub_vv_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                 vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwsub_vx_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m1_t __riscv_vwsub_wv_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwsub_wx_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                 int16_t rs1, size_t vl);
vint32m2_t __riscv_vwsub_vv_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                 vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwsub_vx_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                 int16_t rs1, size_t vl);
vint32m2_t __riscv_vwsub_wv_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwsub_wx_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m4_t __riscv_vwsub_vv_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                 vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwsub_vx_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                 int16_t rs1, size_t vl);
vint32m4_t __riscv_vwsub_wv_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwsub_wx_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                 int16_t rs1, size_t vl);
vint32m8_t __riscv_vwsub_vv_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                 vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwsub_vx_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                 int16_t rs1, size_t vl);
vint32m8_t __riscv_vwsub_wv_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwsub_wx_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                 int16_t rs1, size_t vl);
vint64m1_t __riscv_vwsub_vv_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                 vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwsub_vx_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                 int32_t rs1, size_t vl);
vint64m1_t __riscv_vwsub_wv_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwsub_wx_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                 int32_t rs1, size_t vl);
vint64m2_t __riscv_vwsub_vv_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                 vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwsub_vx_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                 int32_t rs1, size_t vl);
vint64m2_t __riscv_vwsub_wv_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwsub_wx_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                 int32_t rs1, size_t vl);
vint64m4_t __riscv_vwsub_vv_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                 vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwsub_vx_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                 int32_t rs1, size_t vl);
vint64m4_t __riscv_vwsub_wv_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwsub_wx_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                 int32_t rs1, size_t vl);
vint64m8_t __riscv_vwsub_vv_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                 vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwsub_vx_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                 int32_t rs1, size_t vl);
vint64m8_t __riscv_vwsub_wv_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwsub_wx_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                 int32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_vv_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint8mf8_t vs2, vuint8mf8_t vs1,
                                    size_t vl);
vuint16mf4_t __riscv_vwaddu_vx_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_wv_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint16mf4_t vs2, vuint8mf8_t vs1,
                                    size_t vl);
vuint16mf4_t __riscv_vwaddu_wx_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint16mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_vv_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint8mf4_t vs2, vuint8mf4_t vs1,
                                    size_t vl);
vuint16mf2_t __riscv_vwaddu_vx_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_wv_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint16mf2_t vs2, vuint8mf4_t vs1,
                                    size_t vl);
vuint16mf2_t __riscv_vwaddu_wx_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint16mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vv_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint8mf2_t vs2, vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vx_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint8mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwaddu_wv_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint16m1_t vs2, vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_wx_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint16m1_t vs2, uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwaddu_vv_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                   vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwaddu_vx_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwaddu_wv_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                   vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwaddu_wx_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwaddu_vv_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                   vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwaddu_vx_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwaddu_wv_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                   vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwaddu_wx_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwaddu_vv_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                   vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwaddu_vx_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwaddu_wv_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                   vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwaddu_wx_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                   uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_vv_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint16mf4_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vuint32mf2_t __riscv_vwaddu_vx_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_wv_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint32mf2_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vuint32mf2_t __riscv_vwaddu_wx_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint32mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwaddu_vv_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint16mf2_t vs2, vuint16mf2_t vs1,
                                   size_t vl);
vuint32m1_t __riscv_vwaddu_vx_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwaddu_wv_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint32m1_t vs2, vuint16mf2_t vs1,
                                   size_t vl);
vuint32m1_t __riscv_vwaddu_wx_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint32m1_t vs2, uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwaddu_vv_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint16m1_t vs2, vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_vx_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint16m1_t vs2, uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwaddu_wv_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint32m2_t vs2, vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_wx_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint32m2_t vs2, uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwaddu_vv_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_vx_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwaddu_wv_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_wx_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwaddu_vv_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_vx_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwaddu_wv_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_wx_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                   uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwaddu_vv_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint32mf2_t vs2, vuint32mf2_t vs1,
                                   size_t vl);
vuint64m1_t __riscv_vwaddu_vx_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vwaddu_wv_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint64m1_t vs2, vuint32mf2_t vs1,
                                   size_t vl);
vuint64m1_t __riscv_vwaddu_wx_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint64m1_t vs2, uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwaddu_vv_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint32m1_t vs2, vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_vx_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint32m1_t vs2, uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwaddu_wv_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint64m2_t vs2, vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_wx_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint64m2_t vs2, uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwaddu_vv_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint32m2_t vs2, vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_vx_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint32m2_t vs2, uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwaddu_wv_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint64m4_t vs2, vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_wx_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint64m4_t vs2, uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwaddu_vv_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                   vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_vx_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                   uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwaddu_wv_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                   vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_wx_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                   uint32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_vv_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint8mf8_t vs2, vuint8mf8_t vs1,
                                    size_t vl);
vuint16mf4_t __riscv_vwsubu_vx_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_wv_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint16mf4_t vs2, vuint8mf8_t vs1,
                                    size_t vl);
vuint16mf4_t __riscv_vwsubu_wx_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint16mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_vv_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint8mf4_t vs2, vuint8mf4_t vs1,
                                    size_t vl);
vuint16mf2_t __riscv_vwsubu_vx_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_wv_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint16mf2_t vs2, vuint8mf4_t vs1,
                                    size_t vl);
vuint16mf2_t __riscv_vwsubu_wx_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint16mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vv_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint8mf2_t vs2, vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vx_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint8mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwsubu_wv_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint16m1_t vs2, vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_wx_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint16m1_t vs2, uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwsubu_vv_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                   vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwsubu_vx_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwsubu_wv_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                   vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwsubu_wx_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwsubu_vv_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                   vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwsubu_vx_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwsubu_wv_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                   vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwsubu_wx_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwsubu_vv_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                   vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwsubu_vx_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                   uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwsubu_wv_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                   vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwsubu_wx_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                   uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_vv_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint16mf4_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vuint32mf2_t __riscv_vwsubu_vx_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_wv_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint32mf2_t vs2, vuint16mf4_t vs1,
                                    size_t vl);
vuint32mf2_t __riscv_vwsubu_wx_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint32mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwsubu_vv_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint16mf2_t vs2, vuint16mf2_t vs1,
                                   size_t vl);
vuint32m1_t __riscv_vwsubu_vx_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwsubu_wv_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint32m1_t vs2, vuint16mf2_t vs1,
                                   size_t vl);
vuint32m1_t __riscv_vwsubu_wx_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint32m1_t vs2, uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwsubu_vv_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint16m1_t vs2, vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_vx_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint16m1_t vs2, uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwsubu_wv_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint32m2_t vs2, vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_wx_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint32m2_t vs2, uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwsubu_vv_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_vx_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwsubu_wv_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                   vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_wx_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwsubu_vv_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_vx_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                   uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwsubu_wv_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                   vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_wx_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                   uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwsubu_vv_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint32mf2_t vs2, vuint32mf2_t vs1,
                                   size_t vl);
vuint64m1_t __riscv_vwsubu_vx_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vwsubu_wv_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint64m1_t vs2, vuint32mf2_t vs1,
                                   size_t vl);
vuint64m1_t __riscv_vwsubu_wx_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint64m1_t vs2, uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwsubu_vv_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint32m1_t vs2, vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_vx_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint32m1_t vs2, uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwsubu_wv_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint64m2_t vs2, vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_wx_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint64m2_t vs2, uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwsubu_vv_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint32m2_t vs2, vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_vx_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint32m2_t vs2, uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwsubu_wv_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint64m4_t vs2, vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_wx_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint64m4_t vs2, uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwsubu_vv_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                   vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_vx_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                   uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwsubu_wv_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                   vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_wx_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                   uint32_t rs1, size_t vl);
// masked functions
vint16mf4_t __riscv_vwadd_vv_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwadd_vx_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                int8_t rs1, size_t vl);
vint16mf4_t __riscv_vwadd_wv_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwadd_wx_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwadd_vv_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwadd_vx_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwadd_wv_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwadd_wx_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                int8_t rs1, size_t vl);
vint16m1_t __riscv_vwadd_vv_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                               vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwadd_vx_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                               int8_t rs1, size_t vl);
vint16m1_t __riscv_vwadd_wv_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwadd_wx_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               int8_t rs1, size_t vl);
vint16m2_t __riscv_vwadd_vv_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                               vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwadd_vx_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                               int8_t rs1, size_t vl);
vint16m2_t __riscv_vwadd_wv_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwadd_wx_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               int8_t rs1, size_t vl);
vint16m4_t __riscv_vwadd_vv_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                               vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwadd_vx_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                               int8_t rs1, size_t vl);
vint16m4_t __riscv_vwadd_wv_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwadd_wx_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               int8_t rs1, size_t vl);
vint16m8_t __riscv_vwadd_vv_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                               vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwadd_vx_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                               int8_t rs1, size_t vl);
vint16m8_t __riscv_vwadd_wv_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwadd_wx_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwadd_vv_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_vx_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                int16_t rs1, size_t vl);
vint32mf2_t __riscv_vwadd_wv_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwadd_wx_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                int16_t rs1, size_t vl);
vint32m1_t __riscv_vwadd_vv_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                               vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwadd_vx_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                               int16_t rs1, size_t vl);
vint32m1_t __riscv_vwadd_wv_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwadd_wx_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               int16_t rs1, size_t vl);
vint32m2_t __riscv_vwadd_vv_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                               vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwadd_vx_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                               int16_t rs1, size_t vl);
vint32m2_t __riscv_vwadd_wv_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwadd_wx_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               int16_t rs1, size_t vl);
vint32m4_t __riscv_vwadd_vv_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                               vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwadd_vx_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                               int16_t rs1, size_t vl);
vint32m4_t __riscv_vwadd_wv_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwadd_wx_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               int16_t rs1, size_t vl);
vint32m8_t __riscv_vwadd_vv_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                               vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwadd_vx_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                               int16_t rs1, size_t vl);
vint32m8_t __riscv_vwadd_wv_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwadd_wx_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               int16_t rs1, size_t vl);
vint64m1_t __riscv_vwadd_vv_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                               vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwadd_vx_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                               int32_t rs1, size_t vl);
vint64m1_t __riscv_vwadd_wv_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwadd_wx_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               int32_t rs1, size_t vl);
vint64m2_t __riscv_vwadd_vv_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                               vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwadd_vx_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                               int32_t rs1, size_t vl);
vint64m2_t __riscv_vwadd_wv_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwadd_wx_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               int32_t rs1, size_t vl);
vint64m4_t __riscv_vwadd_vv_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                               vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwadd_vx_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                               int32_t rs1, size_t vl);
vint64m4_t __riscv_vwadd_wv_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwadd_wx_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               int32_t rs1, size_t vl);
vint64m8_t __riscv_vwadd_vv_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                               vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwadd_vx_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                               int32_t rs1, size_t vl);
vint64m8_t __riscv_vwadd_wv_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwadd_wx_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               int32_t rs1, size_t vl);
vint16mf4_t __riscv_vwsub_vv_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwsub_vx_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                int8_t rs1, size_t vl);
vint16mf4_t __riscv_vwsub_wv_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwsub_wx_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwsub_vv_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwsub_vx_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwsub_wv_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwsub_wx_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                int8_t rs1, size_t vl);
vint16m1_t __riscv_vwsub_vv_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                               vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwsub_vx_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                               int8_t rs1, size_t vl);
vint16m1_t __riscv_vwsub_wv_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwsub_wx_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               int8_t rs1, size_t vl);
vint16m2_t __riscv_vwsub_vv_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                               vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwsub_vx_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                               int8_t rs1, size_t vl);
vint16m2_t __riscv_vwsub_wv_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwsub_wx_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               int8_t rs1, size_t vl);
vint16m4_t __riscv_vwsub_vv_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                               vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwsub_vx_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                               int8_t rs1, size_t vl);
vint16m4_t __riscv_vwsub_wv_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwsub_wx_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               int8_t rs1, size_t vl);
vint16m8_t __riscv_vwsub_vv_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                               vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwsub_vx_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                               int8_t rs1, size_t vl);
vint16m8_t __riscv_vwsub_wv_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwsub_wx_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwsub_vv_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_vx_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                int16_t rs1, size_t vl);
vint32mf2_t __riscv_vwsub_wv_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwsub_wx_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                int16_t rs1, size_t vl);
vint32m1_t __riscv_vwsub_vv_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                               vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwsub_vx_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                               int16_t rs1, size_t vl);
vint32m1_t __riscv_vwsub_wv_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwsub_wx_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               int16_t rs1, size_t vl);
vint32m2_t __riscv_vwsub_vv_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                               vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwsub_vx_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                               int16_t rs1, size_t vl);
vint32m2_t __riscv_vwsub_wv_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwsub_wx_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               int16_t rs1, size_t vl);
vint32m4_t __riscv_vwsub_vv_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                               vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwsub_vx_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                               int16_t rs1, size_t vl);
vint32m4_t __riscv_vwsub_wv_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwsub_wx_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               int16_t rs1, size_t vl);
vint32m8_t __riscv_vwsub_vv_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                               vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwsub_vx_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                               int16_t rs1, size_t vl);
vint32m8_t __riscv_vwsub_wv_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwsub_wx_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               int16_t rs1, size_t vl);
vint64m1_t __riscv_vwsub_vv_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                               vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwsub_vx_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                               int32_t rs1, size_t vl);
vint64m1_t __riscv_vwsub_wv_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwsub_wx_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               int32_t rs1, size_t vl);
vint64m2_t __riscv_vwsub_vv_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                               vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwsub_vx_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                               int32_t rs1, size_t vl);
vint64m2_t __riscv_vwsub_wv_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwsub_wx_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               int32_t rs1, size_t vl);
vint64m4_t __riscv_vwsub_vv_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                               vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwsub_vx_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                               int32_t rs1, size_t vl);
vint64m4_t __riscv_vwsub_wv_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwsub_wx_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               int32_t rs1, size_t vl);
vint64m8_t __riscv_vwsub_vv_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                               vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwsub_vx_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                               int32_t rs1, size_t vl);
vint64m8_t __riscv_vwsub_wv_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwsub_wx_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               int32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_vv_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint8mf8_t vs2, vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_vx_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_wv_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwaddu_wx_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_vv_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint8mf4_t vs2, vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_vx_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_wv_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwaddu_wx_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vv_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_vx_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwaddu_wv_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwaddu_wx_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwaddu_vv_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                 vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwaddu_vx_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwaddu_wv_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwaddu_wx_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwaddu_vv_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                 vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwaddu_vx_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwaddu_wv_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwaddu_wx_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwaddu_vv_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                 vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwaddu_vx_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwaddu_wv_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwaddu_wx_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_vv_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint16mf4_t vs2, vuint16mf4_t vs1,
                                  size_t vl);
vuint32mf2_t __riscv_vwaddu_vx_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwaddu_wv_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, vuint16mf4_t vs1,
                                  size_t vl);
vuint32mf2_t __riscv_vwaddu_wx_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwaddu_vv_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwaddu_vx_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwaddu_wv_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwaddu_wx_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwaddu_vv_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_vx_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwaddu_wv_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwaddu_wx_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwaddu_vv_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_vx_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwaddu_wv_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwaddu_wx_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwaddu_vv_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_vx_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwaddu_wv_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwaddu_wx_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwaddu_vv_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwaddu_vx_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vwaddu_wv_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwaddu_wx_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwaddu_vv_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_vx_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwaddu_wv_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwaddu_wx_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwaddu_vv_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_vx_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwaddu_wv_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwaddu_wx_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwaddu_vv_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_vx_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwaddu_wv_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwaddu_wx_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 uint32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_vv_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint8mf8_t vs2, vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_vx_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint8mf8_t vs2, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_wv_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwsubu_wx_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint16mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_vv_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint8mf4_t vs2, vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_vx_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint8mf4_t vs2, uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_wv_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwsubu_wx_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint16mf2_t vs2, uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vv_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_vx_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwsubu_wv_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwsubu_wx_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwsubu_vv_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                 vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwsubu_vx_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwsubu_wv_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwsubu_wx_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwsubu_vv_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                 vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwsubu_vx_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwsubu_wv_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwsubu_wx_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwsubu_vv_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                 vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwsubu_vx_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwsubu_wv_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwsubu_wx_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                 uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_vv_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint16mf4_t vs2, vuint16mf4_t vs1,
                                  size_t vl);
vuint32mf2_t __riscv_vwsubu_vx_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vwsubu_wv_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, vuint16mf4_t vs1,
                                  size_t vl);
vuint32mf2_t __riscv_vwsubu_wx_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint32mf2_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwsubu_vv_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwsubu_vx_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwsubu_wv_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwsubu_wx_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwsubu_vv_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_vx_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwsubu_wv_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwsubu_wx_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwsubu_vv_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_vx_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwsubu_wv_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwsubu_wx_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwsubu_vv_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_vx_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                 uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwsubu_wv_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwsubu_wx_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                 uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwsubu_vv_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwsubu_vx_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vwsubu_wv_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwsubu_wx_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwsubu_vv_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_vx_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwsubu_wv_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwsubu_wx_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwsubu_vv_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_vx_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwsubu_wv_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwsubu_wx_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwsubu_vv_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_vx_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                 uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwsubu_wv_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwsubu_wx_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                 uint32_t rs1, size_t vl);

Vector Integer Widening Intrinsics

vint16mf4_t __riscv_vwcvt_x_tu(vint16mf4_t vd, vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwcvt_x_tu(vint16mf2_t vd, vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwcvt_x_tu(vint16m1_t vd, vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwcvt_x_tu(vint16m2_t vd, vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwcvt_x_tu(vint16m4_t vd, vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwcvt_x_tu(vint16m8_t vd, vint8m4_t vs2, size_t vl);
vuint16mf4_t __riscv_vwcvtu_x_tu(vuint16mf4_t vd, vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vwcvtu_x_tu(vuint16mf2_t vd, vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vwcvtu_x_tu(vuint16m1_t vd, vuint8mf2_t vs2, size_t vl);
vuint16m2_t __riscv_vwcvtu_x_tu(vuint16m2_t vd, vuint8m1_t vs2, size_t vl);
vuint16m4_t __riscv_vwcvtu_x_tu(vuint16m4_t vd, vuint8m2_t vs2, size_t vl);
vuint16m8_t __riscv_vwcvtu_x_tu(vuint16m8_t vd, vuint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwcvt_x_tu(vint32mf2_t vd, vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwcvt_x_tu(vint32m1_t vd, vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwcvt_x_tu(vint32m2_t vd, vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwcvt_x_tu(vint32m4_t vd, vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwcvt_x_tu(vint32m8_t vd, vint16m4_t vs2, size_t vl);
vuint32mf2_t __riscv_vwcvtu_x_tu(vuint32mf2_t vd, vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vwcvtu_x_tu(vuint32m1_t vd, vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vwcvtu_x_tu(vuint32m2_t vd, vuint16m1_t vs2, size_t vl);
vuint32m4_t __riscv_vwcvtu_x_tu(vuint32m4_t vd, vuint16m2_t vs2, size_t vl);
vuint32m8_t __riscv_vwcvtu_x_tu(vuint32m8_t vd, vuint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwcvt_x_tu(vint64m1_t vd, vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwcvt_x_tu(vint64m2_t vd, vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwcvt_x_tu(vint64m4_t vd, vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwcvt_x_tu(vint64m8_t vd, vint32m4_t vs2, size_t vl);
vuint64m1_t __riscv_vwcvtu_x_tu(vuint64m1_t vd, vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vwcvtu_x_tu(vuint64m2_t vd, vuint32m1_t vs2, size_t vl);
vuint64m4_t __riscv_vwcvtu_x_tu(vuint64m4_t vd, vuint32m2_t vs2, size_t vl);
vuint64m8_t __riscv_vwcvtu_x_tu(vuint64m8_t vd, vuint32m4_t vs2, size_t vl);
// masked functions
vint16mf4_t __riscv_vwcvt_x_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                size_t vl);
vint16mf2_t __riscv_vwcvt_x_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                size_t vl);
vint16m1_t __riscv_vwcvt_x_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                               size_t vl);
vint16m2_t __riscv_vwcvt_x_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                               size_t vl);
vint16m4_t __riscv_vwcvt_x_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                               size_t vl);
vint16m8_t __riscv_vwcvt_x_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                               size_t vl);
vuint16mf4_t __riscv_vwcvtu_x_tum(vbool64_t vm, vuint16mf4_t vd,
                                  vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vwcvtu_x_tum(vbool32_t vm, vuint16mf2_t vd,
                                  vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vwcvtu_x_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                 size_t vl);
vuint16m2_t __riscv_vwcvtu_x_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                 size_t vl);
vuint16m4_t __riscv_vwcvtu_x_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                 size_t vl);
vuint16m8_t __riscv_vwcvtu_x_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                 size_t vl);
vint32mf2_t __riscv_vwcvt_x_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                size_t vl);
vint32m1_t __riscv_vwcvt_x_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                               size_t vl);
vint32m2_t __riscv_vwcvt_x_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                               size_t vl);
vint32m4_t __riscv_vwcvt_x_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                               size_t vl);
vint32m8_t __riscv_vwcvt_x_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                               size_t vl);
vuint32mf2_t __riscv_vwcvtu_x_tum(vbool64_t vm, vuint32mf2_t vd,
                                  vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vwcvtu_x_tum(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                 size_t vl);
vuint32m2_t __riscv_vwcvtu_x_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                 size_t vl);
vuint32m4_t __riscv_vwcvtu_x_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                 size_t vl);
vuint32m8_t __riscv_vwcvtu_x_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                 size_t vl);
vint64m1_t __riscv_vwcvt_x_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                               size_t vl);
vint64m2_t __riscv_vwcvt_x_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                               size_t vl);
vint64m4_t __riscv_vwcvt_x_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                               size_t vl);
vint64m8_t __riscv_vwcvt_x_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                               size_t vl);
vuint64m1_t __riscv_vwcvtu_x_tum(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                 size_t vl);
vuint64m2_t __riscv_vwcvtu_x_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                 size_t vl);
vuint64m4_t __riscv_vwcvtu_x_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                 size_t vl);
vuint64m8_t __riscv_vwcvtu_x_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                 size_t vl);
// masked functions
vint16mf4_t __riscv_vwcvt_x_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 size_t vl);
vint16mf2_t __riscv_vwcvt_x_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 size_t vl);
vint16m1_t __riscv_vwcvt_x_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                size_t vl);
vint16m2_t __riscv_vwcvt_x_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                size_t vl);
vint16m4_t __riscv_vwcvt_x_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                size_t vl);
vint16m8_t __riscv_vwcvt_x_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                size_t vl);
vuint16mf4_t __riscv_vwcvtu_x_tumu(vbool64_t vm, vuint16mf4_t vd,
                                   vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vwcvtu_x_tumu(vbool32_t vm, vuint16mf2_t vd,
                                   vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vwcvtu_x_tumu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                  size_t vl);
vuint16m2_t __riscv_vwcvtu_x_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                  size_t vl);
vuint16m4_t __riscv_vwcvtu_x_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                  size_t vl);
vuint16m8_t __riscv_vwcvtu_x_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                  size_t vl);
vint32mf2_t __riscv_vwcvt_x_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 size_t vl);
vint32m1_t __riscv_vwcvt_x_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                size_t vl);
vint32m2_t __riscv_vwcvt_x_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                size_t vl);
vint32m4_t __riscv_vwcvt_x_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                size_t vl);
vint32m8_t __riscv_vwcvt_x_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                size_t vl);
vuint32mf2_t __riscv_vwcvtu_x_tumu(vbool64_t vm, vuint32mf2_t vd,
                                   vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vwcvtu_x_tumu(vbool32_t vm, vuint32m1_t vd,
                                  vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vwcvtu_x_tumu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                  size_t vl);
vuint32m4_t __riscv_vwcvtu_x_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                  size_t vl);
vuint32m8_t __riscv_vwcvtu_x_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                  size_t vl);
vint64m1_t __riscv_vwcvt_x_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                size_t vl);
vint64m2_t __riscv_vwcvt_x_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                size_t vl);
vint64m4_t __riscv_vwcvt_x_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                size_t vl);
vint64m8_t __riscv_vwcvt_x_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                size_t vl);
vuint64m1_t __riscv_vwcvtu_x_tumu(vbool64_t vm, vuint64m1_t vd,
                                  vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vwcvtu_x_tumu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                  size_t vl);
vuint64m4_t __riscv_vwcvtu_x_tumu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                  size_t vl);
vuint64m8_t __riscv_vwcvtu_x_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                  size_t vl);
// masked functions
vint16mf4_t __riscv_vwcvt_x_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                               size_t vl);
vint16mf2_t __riscv_vwcvt_x_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                               size_t vl);
vint16m1_t __riscv_vwcvt_x_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                              size_t vl);
vint16m2_t __riscv_vwcvt_x_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                              size_t vl);
vint16m4_t __riscv_vwcvt_x_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                              size_t vl);
vint16m8_t __riscv_vwcvt_x_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                              size_t vl);
vuint16mf4_t __riscv_vwcvtu_x_mu(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs2,
                                 size_t vl);
vuint16mf2_t __riscv_vwcvtu_x_mu(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs2,
                                 size_t vl);
vuint16m1_t __riscv_vwcvtu_x_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                size_t vl);
vuint16m2_t __riscv_vwcvtu_x_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                size_t vl);
vuint16m4_t __riscv_vwcvtu_x_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                size_t vl);
vuint16m8_t __riscv_vwcvtu_x_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                size_t vl);
vint32mf2_t __riscv_vwcvt_x_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                               size_t vl);
vint32m1_t __riscv_vwcvt_x_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                              size_t vl);
vint32m2_t __riscv_vwcvt_x_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                              size_t vl);
vint32m4_t __riscv_vwcvt_x_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                              size_t vl);
vint32m8_t __riscv_vwcvt_x_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                              size_t vl);
vuint32mf2_t __riscv_vwcvtu_x_mu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vwcvtu_x_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                size_t vl);
vuint32m2_t __riscv_vwcvtu_x_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                size_t vl);
vuint32m4_t __riscv_vwcvtu_x_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                size_t vl);
vuint32m8_t __riscv_vwcvtu_x_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                size_t vl);
vint64m1_t __riscv_vwcvt_x_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                              size_t vl);
vint64m2_t __riscv_vwcvt_x_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                              size_t vl);
vint64m4_t __riscv_vwcvt_x_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                              size_t vl);
vint64m8_t __riscv_vwcvt_x_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                              size_t vl);
vuint64m1_t __riscv_vwcvtu_x_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                size_t vl);
vuint64m2_t __riscv_vwcvtu_x_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                size_t vl);
vuint64m4_t __riscv_vwcvtu_x_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                size_t vl);
vuint64m8_t __riscv_vwcvtu_x_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                size_t vl);

Vector Integer Extension Intrinsics

vint16mf4_t __riscv_vsext_vf2_tu(vint16mf4_t vd, vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vsext_vf2_tu(vint16mf2_t vd, vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vsext_vf2_tu(vint16m1_t vd, vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vsext_vf2_tu(vint16m2_t vd, vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vsext_vf2_tu(vint16m4_t vd, vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vsext_vf2_tu(vint16m8_t vd, vint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vsext_vf4_tu(vint32mf2_t vd, vint8mf8_t vs2, size_t vl);
vint32m1_t __riscv_vsext_vf4_tu(vint32m1_t vd, vint8mf4_t vs2, size_t vl);
vint32m2_t __riscv_vsext_vf4_tu(vint32m2_t vd, vint8mf2_t vs2, size_t vl);
vint32m4_t __riscv_vsext_vf4_tu(vint32m4_t vd, vint8m1_t vs2, size_t vl);
vint32m8_t __riscv_vsext_vf4_tu(vint32m8_t vd, vint8m2_t vs2, size_t vl);
vint64m1_t __riscv_vsext_vf8_tu(vint64m1_t vd, vint8mf8_t vs2, size_t vl);
vint64m2_t __riscv_vsext_vf8_tu(vint64m2_t vd, vint8mf4_t vs2, size_t vl);
vint64m4_t __riscv_vsext_vf8_tu(vint64m4_t vd, vint8mf2_t vs2, size_t vl);
vint64m8_t __riscv_vsext_vf8_tu(vint64m8_t vd, vint8m1_t vs2, size_t vl);
vint32mf2_t __riscv_vsext_vf2_tu(vint32mf2_t vd, vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vsext_vf2_tu(vint32m1_t vd, vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vsext_vf2_tu(vint32m2_t vd, vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vsext_vf2_tu(vint32m4_t vd, vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vsext_vf2_tu(vint32m8_t vd, vint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vsext_vf4_tu(vint64m1_t vd, vint16mf4_t vs2, size_t vl);
vint64m2_t __riscv_vsext_vf4_tu(vint64m2_t vd, vint16mf2_t vs2, size_t vl);
vint64m4_t __riscv_vsext_vf4_tu(vint64m4_t vd, vint16m1_t vs2, size_t vl);
vint64m8_t __riscv_vsext_vf4_tu(vint64m8_t vd, vint16m2_t vs2, size_t vl);
vint64m1_t __riscv_vsext_vf2_tu(vint64m1_t vd, vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vsext_vf2_tu(vint64m2_t vd, vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vsext_vf2_tu(vint64m4_t vd, vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vsext_vf2_tu(vint64m8_t vd, vint32m4_t vs2, size_t vl);
vuint16mf4_t __riscv_vzext_vf2_tu(vuint16mf4_t vd, vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vzext_vf2_tu(vuint16mf2_t vd, vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vzext_vf2_tu(vuint16m1_t vd, vuint8mf2_t vs2, size_t vl);
vuint16m2_t __riscv_vzext_vf2_tu(vuint16m2_t vd, vuint8m1_t vs2, size_t vl);
vuint16m4_t __riscv_vzext_vf2_tu(vuint16m4_t vd, vuint8m2_t vs2, size_t vl);
vuint16m8_t __riscv_vzext_vf2_tu(vuint16m8_t vd, vuint8m4_t vs2, size_t vl);
vuint32mf2_t __riscv_vzext_vf4_tu(vuint32mf2_t vd, vuint8mf8_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf4_tu(vuint32m1_t vd, vuint8mf4_t vs2, size_t vl);
vuint32m2_t __riscv_vzext_vf4_tu(vuint32m2_t vd, vuint8mf2_t vs2, size_t vl);
vuint32m4_t __riscv_vzext_vf4_tu(vuint32m4_t vd, vuint8m1_t vs2, size_t vl);
vuint32m8_t __riscv_vzext_vf4_tu(vuint32m8_t vd, vuint8m2_t vs2, size_t vl);
vuint64m1_t __riscv_vzext_vf8_tu(vuint64m1_t vd, vuint8mf8_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf8_tu(vuint64m2_t vd, vuint8mf4_t vs2, size_t vl);
vuint64m4_t __riscv_vzext_vf8_tu(vuint64m4_t vd, vuint8mf2_t vs2, size_t vl);
vuint64m8_t __riscv_vzext_vf8_tu(vuint64m8_t vd, vuint8m1_t vs2, size_t vl);
vuint32mf2_t __riscv_vzext_vf2_tu(vuint32mf2_t vd, vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf2_tu(vuint32m1_t vd, vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vzext_vf2_tu(vuint32m2_t vd, vuint16m1_t vs2, size_t vl);
vuint32m4_t __riscv_vzext_vf2_tu(vuint32m4_t vd, vuint16m2_t vs2, size_t vl);
vuint32m8_t __riscv_vzext_vf2_tu(vuint32m8_t vd, vuint16m4_t vs2, size_t vl);
vuint64m1_t __riscv_vzext_vf4_tu(vuint64m1_t vd, vuint16mf4_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf4_tu(vuint64m2_t vd, vuint16mf2_t vs2, size_t vl);
vuint64m4_t __riscv_vzext_vf4_tu(vuint64m4_t vd, vuint16m1_t vs2, size_t vl);
vuint64m8_t __riscv_vzext_vf4_tu(vuint64m8_t vd, vuint16m2_t vs2, size_t vl);
vuint64m1_t __riscv_vzext_vf2_tu(vuint64m1_t vd, vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf2_tu(vuint64m2_t vd, vuint32m1_t vs2, size_t vl);
vuint64m4_t __riscv_vzext_vf2_tu(vuint64m4_t vd, vuint32m2_t vs2, size_t vl);
vuint64m8_t __riscv_vzext_vf2_tu(vuint64m8_t vd, vuint32m4_t vs2, size_t vl);
// masked functions
vint16mf4_t __riscv_vsext_vf2_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                  size_t vl);
vint16mf2_t __riscv_vsext_vf2_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                  size_t vl);
vint16m1_t __riscv_vsext_vf2_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                 size_t vl);
vint16m2_t __riscv_vsext_vf2_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                 size_t vl);
vint16m4_t __riscv_vsext_vf2_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                 size_t vl);
vint16m8_t __riscv_vsext_vf2_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                 size_t vl);
vint32mf2_t __riscv_vsext_vf4_tum(vbool64_t vm, vint32mf2_t vd, vint8mf8_t vs2,
                                  size_t vl);
vint32m1_t __riscv_vsext_vf4_tum(vbool32_t vm, vint32m1_t vd, vint8mf4_t vs2,
                                 size_t vl);
vint32m2_t __riscv_vsext_vf4_tum(vbool16_t vm, vint32m2_t vd, vint8mf2_t vs2,
                                 size_t vl);
vint32m4_t __riscv_vsext_vf4_tum(vbool8_t vm, vint32m4_t vd, vint8m1_t vs2,
                                 size_t vl);
vint32m8_t __riscv_vsext_vf4_tum(vbool4_t vm, vint32m8_t vd, vint8m2_t vs2,
                                 size_t vl);
vint64m1_t __riscv_vsext_vf8_tum(vbool64_t vm, vint64m1_t vd, vint8mf8_t vs2,
                                 size_t vl);
vint64m2_t __riscv_vsext_vf8_tum(vbool32_t vm, vint64m2_t vd, vint8mf4_t vs2,
                                 size_t vl);
vint64m4_t __riscv_vsext_vf8_tum(vbool16_t vm, vint64m4_t vd, vint8mf2_t vs2,
                                 size_t vl);
vint64m8_t __riscv_vsext_vf8_tum(vbool8_t vm, vint64m8_t vd, vint8m1_t vs2,
                                 size_t vl);
vint32mf2_t __riscv_vsext_vf2_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                  size_t vl);
vint32m1_t __riscv_vsext_vf2_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                 size_t vl);
vint32m2_t __riscv_vsext_vf2_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                 size_t vl);
vint32m4_t __riscv_vsext_vf2_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                 size_t vl);
vint32m8_t __riscv_vsext_vf2_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                 size_t vl);
vint64m1_t __riscv_vsext_vf4_tum(vbool64_t vm, vint64m1_t vd, vint16mf4_t vs2,
                                 size_t vl);
vint64m2_t __riscv_vsext_vf4_tum(vbool32_t vm, vint64m2_t vd, vint16mf2_t vs2,
                                 size_t vl);
vint64m4_t __riscv_vsext_vf4_tum(vbool16_t vm, vint64m4_t vd, vint16m1_t vs2,
                                 size_t vl);
vint64m8_t __riscv_vsext_vf4_tum(vbool8_t vm, vint64m8_t vd, vint16m2_t vs2,
                                 size_t vl);
vint64m1_t __riscv_vsext_vf2_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                 size_t vl);
vint64m2_t __riscv_vsext_vf2_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                 size_t vl);
vint64m4_t __riscv_vsext_vf2_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                 size_t vl);
vint64m8_t __riscv_vsext_vf2_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                 size_t vl);
vuint16mf4_t __riscv_vzext_vf2_tum(vbool64_t vm, vuint16mf4_t vd,
                                   vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vzext_vf2_tum(vbool32_t vm, vuint16mf2_t vd,
                                   vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vzext_vf2_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                  size_t vl);
vuint16m2_t __riscv_vzext_vf2_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                  size_t vl);
vuint16m4_t __riscv_vzext_vf2_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                  size_t vl);
vuint16m8_t __riscv_vzext_vf2_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                  size_t vl);
vuint32mf2_t __riscv_vzext_vf4_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint8mf8_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf4_tum(vbool32_t vm, vuint32m1_t vd, vuint8mf4_t vs2,
                                  size_t vl);
vuint32m2_t __riscv_vzext_vf4_tum(vbool16_t vm, vuint32m2_t vd, vuint8mf2_t vs2,
                                  size_t vl);
vuint32m4_t __riscv_vzext_vf4_tum(vbool8_t vm, vuint32m4_t vd, vuint8m1_t vs2,
                                  size_t vl);
vuint32m8_t __riscv_vzext_vf4_tum(vbool4_t vm, vuint32m8_t vd, vuint8m2_t vs2,
                                  size_t vl);
vuint64m1_t __riscv_vzext_vf8_tum(vbool64_t vm, vuint64m1_t vd, vuint8mf8_t vs2,
                                  size_t vl);
vuint64m2_t __riscv_vzext_vf8_tum(vbool32_t vm, vuint64m2_t vd, vuint8mf4_t vs2,
                                  size_t vl);
vuint64m4_t __riscv_vzext_vf8_tum(vbool16_t vm, vuint64m4_t vd, vuint8mf2_t vs2,
                                  size_t vl);
vuint64m8_t __riscv_vzext_vf8_tum(vbool8_t vm, vuint64m8_t vd, vuint8m1_t vs2,
                                  size_t vl);
vuint32mf2_t __riscv_vzext_vf2_tum(vbool64_t vm, vuint32mf2_t vd,
                                   vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf2_tum(vbool32_t vm, vuint32m1_t vd,
                                  vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vzext_vf2_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                  size_t vl);
vuint32m4_t __riscv_vzext_vf2_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                  size_t vl);
vuint32m8_t __riscv_vzext_vf2_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                  size_t vl);
vuint64m1_t __riscv_vzext_vf4_tum(vbool64_t vm, vuint64m1_t vd,
                                  vuint16mf4_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf4_tum(vbool32_t vm, vuint64m2_t vd,
                                  vuint16mf2_t vs2, size_t vl);
vuint64m4_t __riscv_vzext_vf4_tum(vbool16_t vm, vuint64m4_t vd, vuint16m1_t vs2,
                                  size_t vl);
vuint64m8_t __riscv_vzext_vf4_tum(vbool8_t vm, vuint64m8_t vd, vuint16m2_t vs2,
                                  size_t vl);
vuint64m1_t __riscv_vzext_vf2_tum(vbool64_t vm, vuint64m1_t vd,
                                  vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf2_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                  size_t vl);
vuint64m4_t __riscv_vzext_vf2_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                  size_t vl);
vuint64m8_t __riscv_vzext_vf2_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                  size_t vl);
// masked functions
vint16mf4_t __riscv_vsext_vf2_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                   size_t vl);
vint16mf2_t __riscv_vsext_vf2_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                   size_t vl);
vint16m1_t __riscv_vsext_vf2_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                  size_t vl);
vint16m2_t __riscv_vsext_vf2_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                  size_t vl);
vint16m4_t __riscv_vsext_vf2_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                  size_t vl);
vint16m8_t __riscv_vsext_vf2_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                  size_t vl);
vint32mf2_t __riscv_vsext_vf4_tumu(vbool64_t vm, vint32mf2_t vd, vint8mf8_t vs2,
                                   size_t vl);
vint32m1_t __riscv_vsext_vf4_tumu(vbool32_t vm, vint32m1_t vd, vint8mf4_t vs2,
                                  size_t vl);
vint32m2_t __riscv_vsext_vf4_tumu(vbool16_t vm, vint32m2_t vd, vint8mf2_t vs2,
                                  size_t vl);
vint32m4_t __riscv_vsext_vf4_tumu(vbool8_t vm, vint32m4_t vd, vint8m1_t vs2,
                                  size_t vl);
vint32m8_t __riscv_vsext_vf4_tumu(vbool4_t vm, vint32m8_t vd, vint8m2_t vs2,
                                  size_t vl);
vint64m1_t __riscv_vsext_vf8_tumu(vbool64_t vm, vint64m1_t vd, vint8mf8_t vs2,
                                  size_t vl);
vint64m2_t __riscv_vsext_vf8_tumu(vbool32_t vm, vint64m2_t vd, vint8mf4_t vs2,
                                  size_t vl);
vint64m4_t __riscv_vsext_vf8_tumu(vbool16_t vm, vint64m4_t vd, vint8mf2_t vs2,
                                  size_t vl);
vint64m8_t __riscv_vsext_vf8_tumu(vbool8_t vm, vint64m8_t vd, vint8m1_t vs2,
                                  size_t vl);
vint32mf2_t __riscv_vsext_vf2_tumu(vbool64_t vm, vint32mf2_t vd,
                                   vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vsext_vf2_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                  size_t vl);
vint32m2_t __riscv_vsext_vf2_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                  size_t vl);
vint32m4_t __riscv_vsext_vf2_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                  size_t vl);
vint32m8_t __riscv_vsext_vf2_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                  size_t vl);
vint64m1_t __riscv_vsext_vf4_tumu(vbool64_t vm, vint64m1_t vd, vint16mf4_t vs2,
                                  size_t vl);
vint64m2_t __riscv_vsext_vf4_tumu(vbool32_t vm, vint64m2_t vd, vint16mf2_t vs2,
                                  size_t vl);
vint64m4_t __riscv_vsext_vf4_tumu(vbool16_t vm, vint64m4_t vd, vint16m1_t vs2,
                                  size_t vl);
vint64m8_t __riscv_vsext_vf4_tumu(vbool8_t vm, vint64m8_t vd, vint16m2_t vs2,
                                  size_t vl);
vint64m1_t __riscv_vsext_vf2_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                  size_t vl);
vint64m2_t __riscv_vsext_vf2_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                  size_t vl);
vint64m4_t __riscv_vsext_vf2_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                  size_t vl);
vint64m8_t __riscv_vsext_vf2_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                  size_t vl);
vuint16mf4_t __riscv_vzext_vf2_tumu(vbool64_t vm, vuint16mf4_t vd,
                                    vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vzext_vf2_tumu(vbool32_t vm, vuint16mf2_t vd,
                                    vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vzext_vf2_tumu(vbool16_t vm, vuint16m1_t vd,
                                   vuint8mf2_t vs2, size_t vl);
vuint16m2_t __riscv_vzext_vf2_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                   size_t vl);
vuint16m4_t __riscv_vzext_vf2_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                   size_t vl);
vuint16m8_t __riscv_vzext_vf2_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                   size_t vl);
vuint32mf2_t __riscv_vzext_vf4_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint8mf8_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf4_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint8mf4_t vs2, size_t vl);
vuint32m2_t __riscv_vzext_vf4_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint8mf2_t vs2, size_t vl);
vuint32m4_t __riscv_vzext_vf4_tumu(vbool8_t vm, vuint32m4_t vd, vuint8m1_t vs2,
                                   size_t vl);
vuint32m8_t __riscv_vzext_vf4_tumu(vbool4_t vm, vuint32m8_t vd, vuint8m2_t vs2,
                                   size_t vl);
vuint64m1_t __riscv_vzext_vf8_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint8mf8_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf8_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint8mf4_t vs2, size_t vl);
vuint64m4_t __riscv_vzext_vf8_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint8mf2_t vs2, size_t vl);
vuint64m8_t __riscv_vzext_vf8_tumu(vbool8_t vm, vuint64m8_t vd, vuint8m1_t vs2,
                                   size_t vl);
vuint32mf2_t __riscv_vzext_vf2_tumu(vbool64_t vm, vuint32mf2_t vd,
                                    vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf2_tumu(vbool32_t vm, vuint32m1_t vd,
                                   vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vzext_vf2_tumu(vbool16_t vm, vuint32m2_t vd,
                                   vuint16m1_t vs2, size_t vl);
vuint32m4_t __riscv_vzext_vf2_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                   size_t vl);
vuint32m8_t __riscv_vzext_vf2_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                   size_t vl);
vuint64m1_t __riscv_vzext_vf4_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint16mf4_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf4_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint16mf2_t vs2, size_t vl);
vuint64m4_t __riscv_vzext_vf4_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint16m1_t vs2, size_t vl);
vuint64m8_t __riscv_vzext_vf4_tumu(vbool8_t vm, vuint64m8_t vd, vuint16m2_t vs2,
                                   size_t vl);
vuint64m1_t __riscv_vzext_vf2_tumu(vbool64_t vm, vuint64m1_t vd,
                                   vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vzext_vf2_tumu(vbool32_t vm, vuint64m2_t vd,
                                   vuint32m1_t vs2, size_t vl);
vuint64m4_t __riscv_vzext_vf2_tumu(vbool16_t vm, vuint64m4_t vd,
                                   vuint32m2_t vs2, size_t vl);
vuint64m8_t __riscv_vzext_vf2_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                   size_t vl);
// masked functions
vint16mf4_t __riscv_vsext_vf2_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 size_t vl);
vint16mf2_t __riscv_vsext_vf2_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 size_t vl);
vint16m1_t __riscv_vsext_vf2_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                size_t vl);
vint16m2_t __riscv_vsext_vf2_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                size_t vl);
vint16m4_t __riscv_vsext_vf2_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                size_t vl);
vint16m8_t __riscv_vsext_vf2_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                size_t vl);
vint32mf2_t __riscv_vsext_vf4_mu(vbool64_t vm, vint32mf2_t vd, vint8mf8_t vs2,
                                 size_t vl);
vint32m1_t __riscv_vsext_vf4_mu(vbool32_t vm, vint32m1_t vd, vint8mf4_t vs2,
                                size_t vl);
vint32m2_t __riscv_vsext_vf4_mu(vbool16_t vm, vint32m2_t vd, vint8mf2_t vs2,
                                size_t vl);
vint32m4_t __riscv_vsext_vf4_mu(vbool8_t vm, vint32m4_t vd, vint8m1_t vs2,
                                size_t vl);
vint32m8_t __riscv_vsext_vf4_mu(vbool4_t vm, vint32m8_t vd, vint8m2_t vs2,
                                size_t vl);
vint64m1_t __riscv_vsext_vf8_mu(vbool64_t vm, vint64m1_t vd, vint8mf8_t vs2,
                                size_t vl);
vint64m2_t __riscv_vsext_vf8_mu(vbool32_t vm, vint64m2_t vd, vint8mf4_t vs2,
                                size_t vl);
vint64m4_t __riscv_vsext_vf8_mu(vbool16_t vm, vint64m4_t vd, vint8mf2_t vs2,
                                size_t vl);
vint64m8_t __riscv_vsext_vf8_mu(vbool8_t vm, vint64m8_t vd, vint8m1_t vs2,
                                size_t vl);
vint32mf2_t __riscv_vsext_vf2_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 size_t vl);
vint32m1_t __riscv_vsext_vf2_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                size_t vl);
vint32m2_t __riscv_vsext_vf2_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                size_t vl);
vint32m4_t __riscv_vsext_vf2_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                size_t vl);
vint32m8_t __riscv_vsext_vf2_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                size_t vl);
vint64m1_t __riscv_vsext_vf4_mu(vbool64_t vm, vint64m1_t vd, vint16mf4_t vs2,
                                size_t vl);
vint64m2_t __riscv_vsext_vf4_mu(vbool32_t vm, vint64m2_t vd, vint16mf2_t vs2,
                                size_t vl);
vint64m4_t __riscv_vsext_vf4_mu(vbool16_t vm, vint64m4_t vd, vint16m1_t vs2,
                                size_t vl);
vint64m8_t __riscv_vsext_vf4_mu(vbool8_t vm, vint64m8_t vd, vint16m2_t vs2,
                                size_t vl);
vint64m1_t __riscv_vsext_vf2_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                size_t vl);
vint64m2_t __riscv_vsext_vf2_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                size_t vl);
vint64m4_t __riscv_vsext_vf2_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                size_t vl);
vint64m8_t __riscv_vsext_vf2_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                size_t vl);
vuint16mf4_t __riscv_vzext_vf2_mu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vzext_vf2_mu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vzext_vf2_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                 size_t vl);
vuint16m2_t __riscv_vzext_vf2_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                 size_t vl);
vuint16m4_t __riscv_vzext_vf2_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                 size_t vl);
vuint16m8_t __riscv_vzext_vf2_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                 size_t vl);
vuint32mf2_t __riscv_vzext_vf4_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint8mf8_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf4_mu(vbool32_t vm, vuint32m1_t vd, vuint8mf4_t vs2,
                                 size_t vl);
vuint32m2_t __riscv_vzext_vf4_mu(vbool16_t vm, vuint32m2_t vd, vuint8mf2_t vs2,
                                 size_t vl);
vuint32m4_t __riscv_vzext_vf4_mu(vbool8_t vm, vuint32m4_t vd, vuint8m1_t vs2,
                                 size_t vl);
vuint32m8_t __riscv_vzext_vf4_mu(vbool4_t vm, vuint32m8_t vd, vuint8m2_t vs2,
                                 size_t vl);
vuint64m1_t __riscv_vzext_vf8_mu(vbool64_t vm, vuint64m1_t vd, vuint8mf8_t vs2,
                                 size_t vl);
vuint64m2_t __riscv_vzext_vf8_mu(vbool32_t vm, vuint64m2_t vd, vuint8mf4_t vs2,
                                 size_t vl);
vuint64m4_t __riscv_vzext_vf8_mu(vbool16_t vm, vuint64m4_t vd, vuint8mf2_t vs2,
                                 size_t vl);
vuint64m8_t __riscv_vzext_vf8_mu(vbool8_t vm, vuint64m8_t vd, vuint8m1_t vs2,
                                 size_t vl);
vuint32mf2_t __riscv_vzext_vf2_mu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vzext_vf2_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                 size_t vl);
vuint32m2_t __riscv_vzext_vf2_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                 size_t vl);
vuint32m4_t __riscv_vzext_vf2_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                 size_t vl);
vuint32m8_t __riscv_vzext_vf2_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                 size_t vl);
vuint64m1_t __riscv_vzext_vf4_mu(vbool64_t vm, vuint64m1_t vd, vuint16mf4_t vs2,
                                 size_t vl);
vuint64m2_t __riscv_vzext_vf4_mu(vbool32_t vm, vuint64m2_t vd, vuint16mf2_t vs2,
                                 size_t vl);
vuint64m4_t __riscv_vzext_vf4_mu(vbool16_t vm, vuint64m4_t vd, vuint16m1_t vs2,
                                 size_t vl);
vuint64m8_t __riscv_vzext_vf4_mu(vbool8_t vm, vuint64m8_t vd, vuint16m2_t vs2,
                                 size_t vl);
vuint64m1_t __riscv_vzext_vf2_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                 size_t vl);
vuint64m2_t __riscv_vzext_vf2_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                 size_t vl);
vuint64m4_t __riscv_vzext_vf2_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                 size_t vl);
vuint64m8_t __riscv_vzext_vf2_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                 size_t vl);

Vector Integer Add-with-Carry / Subtract-with-Borrow Intrinsics

vint8mf8_t __riscv_vadc_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           vbool64_t v0, size_t vl);
vint8mf8_t __riscv_vadc_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           vbool64_t v0, size_t vl);
vint8mf4_t __riscv_vadc_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           vbool32_t v0, size_t vl);
vint8mf4_t __riscv_vadc_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           vbool32_t v0, size_t vl);
vint8mf2_t __riscv_vadc_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           vbool16_t v0, size_t vl);
vint8mf2_t __riscv_vadc_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           vbool16_t v0, size_t vl);
vint8m1_t __riscv_vadc_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          vbool8_t v0, size_t vl);
vint8m1_t __riscv_vadc_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, vbool8_t v0,
                          size_t vl);
vint8m2_t __riscv_vadc_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          vbool4_t v0, size_t vl);
vint8m2_t __riscv_vadc_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, vbool4_t v0,
                          size_t vl);
vint8m4_t __riscv_vadc_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          vbool2_t v0, size_t vl);
vint8m4_t __riscv_vadc_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, vbool2_t v0,
                          size_t vl);
vint8m8_t __riscv_vadc_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          vbool1_t v0, size_t vl);
vint8m8_t __riscv_vadc_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, vbool1_t v0,
                          size_t vl);
vint16mf4_t __riscv_vadc_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            vbool64_t v0, size_t vl);
vint16mf4_t __riscv_vadc_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            vbool64_t v0, size_t vl);
vint16mf2_t __riscv_vadc_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            vbool32_t v0, size_t vl);
vint16mf2_t __riscv_vadc_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            vbool32_t v0, size_t vl);
vint16m1_t __riscv_vadc_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           vbool16_t v0, size_t vl);
vint16m1_t __riscv_vadc_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           vbool16_t v0, size_t vl);
vint16m2_t __riscv_vadc_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           vbool8_t v0, size_t vl);
vint16m2_t __riscv_vadc_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           vbool8_t v0, size_t vl);
vint16m4_t __riscv_vadc_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           vbool4_t v0, size_t vl);
vint16m4_t __riscv_vadc_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           vbool4_t v0, size_t vl);
vint16m8_t __riscv_vadc_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           vbool2_t v0, size_t vl);
vint16m8_t __riscv_vadc_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           vbool2_t v0, size_t vl);
vint32mf2_t __riscv_vadc_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            vbool64_t v0, size_t vl);
vint32mf2_t __riscv_vadc_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            vbool64_t v0, size_t vl);
vint32m1_t __riscv_vadc_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           vbool32_t v0, size_t vl);
vint32m1_t __riscv_vadc_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           vbool32_t v0, size_t vl);
vint32m2_t __riscv_vadc_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           vbool16_t v0, size_t vl);
vint32m2_t __riscv_vadc_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           vbool16_t v0, size_t vl);
vint32m4_t __riscv_vadc_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           vbool8_t v0, size_t vl);
vint32m4_t __riscv_vadc_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           vbool8_t v0, size_t vl);
vint32m8_t __riscv_vadc_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           vbool4_t v0, size_t vl);
vint32m8_t __riscv_vadc_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           vbool4_t v0, size_t vl);
vint64m1_t __riscv_vadc_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           vbool64_t v0, size_t vl);
vint64m1_t __riscv_vadc_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           vbool64_t v0, size_t vl);
vint64m2_t __riscv_vadc_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           vbool32_t v0, size_t vl);
vint64m2_t __riscv_vadc_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           vbool32_t v0, size_t vl);
vint64m4_t __riscv_vadc_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           vbool16_t v0, size_t vl);
vint64m4_t __riscv_vadc_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           vbool16_t v0, size_t vl);
vint64m8_t __riscv_vadc_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           vbool8_t v0, size_t vl);
vint64m8_t __riscv_vadc_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           vbool8_t v0, size_t vl);
vint8mf8_t __riscv_vsbc_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           vbool64_t v0, size_t vl);
vint8mf8_t __riscv_vsbc_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           vbool64_t v0, size_t vl);
vint8mf4_t __riscv_vsbc_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           vbool32_t v0, size_t vl);
vint8mf4_t __riscv_vsbc_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           vbool32_t v0, size_t vl);
vint8mf2_t __riscv_vsbc_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           vbool16_t v0, size_t vl);
vint8mf2_t __riscv_vsbc_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           vbool16_t v0, size_t vl);
vint8m1_t __riscv_vsbc_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          vbool8_t v0, size_t vl);
vint8m1_t __riscv_vsbc_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, vbool8_t v0,
                          size_t vl);
vint8m2_t __riscv_vsbc_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          vbool4_t v0, size_t vl);
vint8m2_t __riscv_vsbc_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, vbool4_t v0,
                          size_t vl);
vint8m4_t __riscv_vsbc_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          vbool2_t v0, size_t vl);
vint8m4_t __riscv_vsbc_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, vbool2_t v0,
                          size_t vl);
vint8m8_t __riscv_vsbc_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          vbool1_t v0, size_t vl);
vint8m8_t __riscv_vsbc_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, vbool1_t v0,
                          size_t vl);
vint16mf4_t __riscv_vsbc_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            vbool64_t v0, size_t vl);
vint16mf4_t __riscv_vsbc_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            vbool64_t v0, size_t vl);
vint16mf2_t __riscv_vsbc_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            vbool32_t v0, size_t vl);
vint16mf2_t __riscv_vsbc_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            vbool32_t v0, size_t vl);
vint16m1_t __riscv_vsbc_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           vbool16_t v0, size_t vl);
vint16m1_t __riscv_vsbc_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           vbool16_t v0, size_t vl);
vint16m2_t __riscv_vsbc_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           vbool8_t v0, size_t vl);
vint16m2_t __riscv_vsbc_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           vbool8_t v0, size_t vl);
vint16m4_t __riscv_vsbc_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           vbool4_t v0, size_t vl);
vint16m4_t __riscv_vsbc_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           vbool4_t v0, size_t vl);
vint16m8_t __riscv_vsbc_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           vbool2_t v0, size_t vl);
vint16m8_t __riscv_vsbc_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           vbool2_t v0, size_t vl);
vint32mf2_t __riscv_vsbc_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            vbool64_t v0, size_t vl);
vint32mf2_t __riscv_vsbc_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            vbool64_t v0, size_t vl);
vint32m1_t __riscv_vsbc_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           vbool32_t v0, size_t vl);
vint32m1_t __riscv_vsbc_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           vbool32_t v0, size_t vl);
vint32m2_t __riscv_vsbc_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           vbool16_t v0, size_t vl);
vint32m2_t __riscv_vsbc_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           vbool16_t v0, size_t vl);
vint32m4_t __riscv_vsbc_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           vbool8_t v0, size_t vl);
vint32m4_t __riscv_vsbc_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           vbool8_t v0, size_t vl);
vint32m8_t __riscv_vsbc_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           vbool4_t v0, size_t vl);
vint32m8_t __riscv_vsbc_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           vbool4_t v0, size_t vl);
vint64m1_t __riscv_vsbc_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           vbool64_t v0, size_t vl);
vint64m1_t __riscv_vsbc_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           vbool64_t v0, size_t vl);
vint64m2_t __riscv_vsbc_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           vbool32_t v0, size_t vl);
vint64m2_t __riscv_vsbc_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           vbool32_t v0, size_t vl);
vint64m4_t __riscv_vsbc_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           vbool16_t v0, size_t vl);
vint64m4_t __riscv_vsbc_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           vbool16_t v0, size_t vl);
vint64m8_t __riscv_vsbc_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           vbool8_t v0, size_t vl);
vint64m8_t __riscv_vsbc_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           vbool8_t v0, size_t vl);
vuint8mf8_t __riscv_vadc_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            vbool64_t v0, size_t vl);
vuint8mf8_t __riscv_vadc_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                            vbool64_t v0, size_t vl);
vuint8mf4_t __riscv_vadc_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            vbool32_t v0, size_t vl);
vuint8mf4_t __riscv_vadc_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                            vbool32_t v0, size_t vl);
vuint8mf2_t __riscv_vadc_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            vbool16_t v0, size_t vl);
vuint8mf2_t __riscv_vadc_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                            vbool16_t v0, size_t vl);
vuint8m1_t __riscv_vadc_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           vbool8_t v0, size_t vl);
vuint8m1_t __riscv_vadc_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                           vbool8_t v0, size_t vl);
vuint8m2_t __riscv_vadc_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           vbool4_t v0, size_t vl);
vuint8m2_t __riscv_vadc_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                           vbool4_t v0, size_t vl);
vuint8m4_t __riscv_vadc_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           vbool2_t v0, size_t vl);
vuint8m4_t __riscv_vadc_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                           vbool2_t v0, size_t vl);
vuint8m8_t __riscv_vadc_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           vbool1_t v0, size_t vl);
vuint8m8_t __riscv_vadc_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                           vbool1_t v0, size_t vl);
vuint16mf4_t __riscv_vadc_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, vbool64_t v0, size_t vl);
vuint16mf4_t __riscv_vadc_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                             vbool64_t v0, size_t vl);
vuint16mf2_t __riscv_vadc_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, vbool32_t v0, size_t vl);
vuint16mf2_t __riscv_vadc_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                             vbool32_t v0, size_t vl);
vuint16m1_t __riscv_vadc_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            vbool16_t v0, size_t vl);
vuint16m1_t __riscv_vadc_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                            vbool16_t v0, size_t vl);
vuint16m2_t __riscv_vadc_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            vbool8_t v0, size_t vl);
vuint16m2_t __riscv_vadc_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                            vbool8_t v0, size_t vl);
vuint16m4_t __riscv_vadc_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            vbool4_t v0, size_t vl);
vuint16m4_t __riscv_vadc_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                            vbool4_t v0, size_t vl);
vuint16m8_t __riscv_vadc_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            vbool2_t v0, size_t vl);
vuint16m8_t __riscv_vadc_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                            vbool2_t v0, size_t vl);
vuint32mf2_t __riscv_vadc_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, vbool64_t v0, size_t vl);
vuint32mf2_t __riscv_vadc_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                             vbool64_t v0, size_t vl);
vuint32m1_t __riscv_vadc_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            vbool32_t v0, size_t vl);
vuint32m1_t __riscv_vadc_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                            vbool32_t v0, size_t vl);
vuint32m2_t __riscv_vadc_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            vbool16_t v0, size_t vl);
vuint32m2_t __riscv_vadc_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                            vbool16_t v0, size_t vl);
vuint32m4_t __riscv_vadc_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            vbool8_t v0, size_t vl);
vuint32m4_t __riscv_vadc_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                            vbool8_t v0, size_t vl);
vuint32m8_t __riscv_vadc_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            vbool4_t v0, size_t vl);
vuint32m8_t __riscv_vadc_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                            vbool4_t v0, size_t vl);
vuint64m1_t __riscv_vadc_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            vbool64_t v0, size_t vl);
vuint64m1_t __riscv_vadc_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                            vbool64_t v0, size_t vl);
vuint64m2_t __riscv_vadc_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            vbool32_t v0, size_t vl);
vuint64m2_t __riscv_vadc_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                            vbool32_t v0, size_t vl);
vuint64m4_t __riscv_vadc_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            vbool16_t v0, size_t vl);
vuint64m4_t __riscv_vadc_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                            vbool16_t v0, size_t vl);
vuint64m8_t __riscv_vadc_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            vbool8_t v0, size_t vl);
vuint64m8_t __riscv_vadc_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                            vbool8_t v0, size_t vl);
vuint8mf8_t __riscv_vsbc_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            vbool64_t v0, size_t vl);
vuint8mf8_t __riscv_vsbc_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                            vbool64_t v0, size_t vl);
vuint8mf4_t __riscv_vsbc_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            vbool32_t v0, size_t vl);
vuint8mf4_t __riscv_vsbc_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                            vbool32_t v0, size_t vl);
vuint8mf2_t __riscv_vsbc_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            vbool16_t v0, size_t vl);
vuint8mf2_t __riscv_vsbc_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                            vbool16_t v0, size_t vl);
vuint8m1_t __riscv_vsbc_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           vbool8_t v0, size_t vl);
vuint8m1_t __riscv_vsbc_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                           vbool8_t v0, size_t vl);
vuint8m2_t __riscv_vsbc_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           vbool4_t v0, size_t vl);
vuint8m2_t __riscv_vsbc_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                           vbool4_t v0, size_t vl);
vuint8m4_t __riscv_vsbc_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           vbool2_t v0, size_t vl);
vuint8m4_t __riscv_vsbc_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                           vbool2_t v0, size_t vl);
vuint8m8_t __riscv_vsbc_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           vbool1_t v0, size_t vl);
vuint8m8_t __riscv_vsbc_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                           vbool1_t v0, size_t vl);
vuint16mf4_t __riscv_vsbc_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, vbool64_t v0, size_t vl);
vuint16mf4_t __riscv_vsbc_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                             vbool64_t v0, size_t vl);
vuint16mf2_t __riscv_vsbc_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, vbool32_t v0, size_t vl);
vuint16mf2_t __riscv_vsbc_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                             vbool32_t v0, size_t vl);
vuint16m1_t __riscv_vsbc_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            vbool16_t v0, size_t vl);
vuint16m1_t __riscv_vsbc_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                            vbool16_t v0, size_t vl);
vuint16m2_t __riscv_vsbc_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            vbool8_t v0, size_t vl);
vuint16m2_t __riscv_vsbc_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                            vbool8_t v0, size_t vl);
vuint16m4_t __riscv_vsbc_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            vbool4_t v0, size_t vl);
vuint16m4_t __riscv_vsbc_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                            vbool4_t v0, size_t vl);
vuint16m8_t __riscv_vsbc_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            vbool2_t v0, size_t vl);
vuint16m8_t __riscv_vsbc_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                            vbool2_t v0, size_t vl);
vuint32mf2_t __riscv_vsbc_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, vbool64_t v0, size_t vl);
vuint32mf2_t __riscv_vsbc_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                             vbool64_t v0, size_t vl);
vuint32m1_t __riscv_vsbc_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            vbool32_t v0, size_t vl);
vuint32m1_t __riscv_vsbc_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                            vbool32_t v0, size_t vl);
vuint32m2_t __riscv_vsbc_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            vbool16_t v0, size_t vl);
vuint32m2_t __riscv_vsbc_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                            vbool16_t v0, size_t vl);
vuint32m4_t __riscv_vsbc_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            vbool8_t v0, size_t vl);
vuint32m4_t __riscv_vsbc_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                            vbool8_t v0, size_t vl);
vuint32m8_t __riscv_vsbc_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            vbool4_t v0, size_t vl);
vuint32m8_t __riscv_vsbc_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                            vbool4_t v0, size_t vl);
vuint64m1_t __riscv_vsbc_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            vbool64_t v0, size_t vl);
vuint64m1_t __riscv_vsbc_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                            vbool64_t v0, size_t vl);
vuint64m2_t __riscv_vsbc_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            vbool32_t v0, size_t vl);
vuint64m2_t __riscv_vsbc_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                            vbool32_t v0, size_t vl);
vuint64m4_t __riscv_vsbc_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            vbool16_t v0, size_t vl);
vuint64m4_t __riscv_vsbc_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                            vbool16_t v0, size_t vl);
vuint64m8_t __riscv_vsbc_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            vbool8_t v0, size_t vl);
vuint64m8_t __riscv_vsbc_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                            vbool8_t v0, size_t vl);

Vector Integer Carry-out / Borrow-out Intrinsics

Intrinsics here don’t have a policy variant.

Vector Bitwise Binary Logical Intrinsics

vint8mf8_t __riscv_vand_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vand_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vand_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vand_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vand_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vand_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vand_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vand_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vand_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vand_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vand_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vand_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vand_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vand_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vand_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vand_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vand_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vand_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vand_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vand_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vand_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vand_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vand_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vand_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vand_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vand_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vand_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vand_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vand_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vand_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vand_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vand_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vand_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vand_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vand_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vand_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vand_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vand_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vand_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vand_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vand_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vand_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vand_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vand_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vint8mf8_t __riscv_vor_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                          size_t vl);
vint8mf8_t __riscv_vor_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1, size_t vl);
vint8mf4_t __riscv_vor_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                          size_t vl);
vint8mf4_t __riscv_vor_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1, size_t vl);
vint8mf2_t __riscv_vor_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                          size_t vl);
vint8mf2_t __riscv_vor_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1, size_t vl);
vint8m1_t __riscv_vor_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vor_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vor_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vor_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vor_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vor_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vor_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vor_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vor_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                           size_t vl);
vint16mf4_t __riscv_vor_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                           size_t vl);
vint16mf2_t __riscv_vor_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                           size_t vl);
vint16mf2_t __riscv_vor_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                           size_t vl);
vint16m1_t __riscv_vor_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                          size_t vl);
vint16m1_t __riscv_vor_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                          size_t vl);
vint16m2_t __riscv_vor_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                          size_t vl);
vint16m2_t __riscv_vor_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                          size_t vl);
vint16m4_t __riscv_vor_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                          size_t vl);
vint16m4_t __riscv_vor_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                          size_t vl);
vint16m8_t __riscv_vor_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                          size_t vl);
vint16m8_t __riscv_vor_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                          size_t vl);
vint32mf2_t __riscv_vor_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                           size_t vl);
vint32mf2_t __riscv_vor_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                           size_t vl);
vint32m1_t __riscv_vor_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                          size_t vl);
vint32m1_t __riscv_vor_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                          size_t vl);
vint32m2_t __riscv_vor_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                          size_t vl);
vint32m2_t __riscv_vor_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                          size_t vl);
vint32m4_t __riscv_vor_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                          size_t vl);
vint32m4_t __riscv_vor_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                          size_t vl);
vint32m8_t __riscv_vor_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                          size_t vl);
vint32m8_t __riscv_vor_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                          size_t vl);
vint64m1_t __riscv_vor_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                          size_t vl);
vint64m1_t __riscv_vor_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                          size_t vl);
vint64m2_t __riscv_vor_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                          size_t vl);
vint64m2_t __riscv_vor_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                          size_t vl);
vint64m4_t __riscv_vor_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                          size_t vl);
vint64m4_t __riscv_vor_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                          size_t vl);
vint64m8_t __riscv_vor_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                          size_t vl);
vint64m8_t __riscv_vor_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                          size_t vl);
vint8mf8_t __riscv_vxor_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vxor_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vxor_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vxor_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vxor_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vxor_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vxor_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vxor_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vxor_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vxor_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vxor_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vxor_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vxor_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vxor_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vxor_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vxor_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vxor_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vxor_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vxor_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vxor_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vxor_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vxor_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vxor_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vxor_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vxor_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vxor_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vxor_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vxor_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vxor_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vxor_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vxor_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vxor_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vxor_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vxor_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vxor_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vxor_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vxor_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vxor_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vxor_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vxor_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vxor_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vxor_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vxor_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vxor_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vuint8mf8_t __riscv_vand_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vuint8mf8_t __riscv_vand_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf4_t __riscv_vand_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vuint8mf4_t __riscv_vand_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf2_t __riscv_vand_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vuint8mf2_t __riscv_vand_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m1_t __riscv_vand_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           size_t vl);
vuint8m1_t __riscv_vand_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m2_t __riscv_vand_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           size_t vl);
vuint8m2_t __riscv_vand_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m4_t __riscv_vand_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           size_t vl);
vuint8m4_t __riscv_vand_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m8_t __riscv_vand_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           size_t vl);
vuint8m8_t __riscv_vand_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                           size_t vl);
vuint16mf4_t __riscv_vand_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vand_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16mf2_t __riscv_vand_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vand_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m1_t __riscv_vand_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            size_t vl);
vuint16m1_t __riscv_vand_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m2_t __riscv_vand_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            size_t vl);
vuint16m2_t __riscv_vand_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m4_t __riscv_vand_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            size_t vl);
vuint16m4_t __riscv_vand_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m8_t __riscv_vand_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            size_t vl);
vuint16m8_t __riscv_vand_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                            size_t vl);
vuint32mf2_t __riscv_vand_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vand_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m1_t __riscv_vand_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            size_t vl);
vuint32m1_t __riscv_vand_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m2_t __riscv_vand_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            size_t vl);
vuint32m2_t __riscv_vand_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m4_t __riscv_vand_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            size_t vl);
vuint32m4_t __riscv_vand_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m8_t __riscv_vand_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            size_t vl);
vuint32m8_t __riscv_vand_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                            size_t vl);
vuint64m1_t __riscv_vand_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            size_t vl);
vuint64m1_t __riscv_vand_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m2_t __riscv_vand_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            size_t vl);
vuint64m2_t __riscv_vand_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m4_t __riscv_vand_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            size_t vl);
vuint64m4_t __riscv_vand_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m8_t __riscv_vand_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            size_t vl);
vuint64m8_t __riscv_vand_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                            size_t vl);
vuint8mf8_t __riscv_vor_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                           size_t vl);
vuint8mf8_t __riscv_vor_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                           size_t vl);
vuint8mf4_t __riscv_vor_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                           size_t vl);
vuint8mf4_t __riscv_vor_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                           size_t vl);
vuint8mf2_t __riscv_vor_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                           size_t vl);
vuint8mf2_t __riscv_vor_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m1_t __riscv_vor_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                          size_t vl);
vuint8m1_t __riscv_vor_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                          size_t vl);
vuint8m2_t __riscv_vor_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                          size_t vl);
vuint8m2_t __riscv_vor_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                          size_t vl);
vuint8m4_t __riscv_vor_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                          size_t vl);
vuint8m4_t __riscv_vor_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                          size_t vl);
vuint8m8_t __riscv_vor_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                          size_t vl);
vuint8m8_t __riscv_vor_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                          size_t vl);
vuint16mf4_t __riscv_vor_tu(vuint16mf4_t vd, vuint16mf4_t vs2, vuint16mf4_t vs1,
                            size_t vl);
vuint16mf4_t __riscv_vor_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                            size_t vl);
vuint16mf2_t __riscv_vor_tu(vuint16mf2_t vd, vuint16mf2_t vs2, vuint16mf2_t vs1,
                            size_t vl);
vuint16mf2_t __riscv_vor_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m1_t __riscv_vor_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                           size_t vl);
vuint16m1_t __riscv_vor_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                           size_t vl);
vuint16m2_t __riscv_vor_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                           size_t vl);
vuint16m2_t __riscv_vor_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                           size_t vl);
vuint16m4_t __riscv_vor_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                           size_t vl);
vuint16m4_t __riscv_vor_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                           size_t vl);
vuint16m8_t __riscv_vor_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                           size_t vl);
vuint16m8_t __riscv_vor_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                           size_t vl);
vuint32mf2_t __riscv_vor_tu(vuint32mf2_t vd, vuint32mf2_t vs2, vuint32mf2_t vs1,
                            size_t vl);
vuint32mf2_t __riscv_vor_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m1_t __riscv_vor_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                           size_t vl);
vuint32m1_t __riscv_vor_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                           size_t vl);
vuint32m2_t __riscv_vor_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                           size_t vl);
vuint32m2_t __riscv_vor_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                           size_t vl);
vuint32m4_t __riscv_vor_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                           size_t vl);
vuint32m4_t __riscv_vor_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                           size_t vl);
vuint32m8_t __riscv_vor_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                           size_t vl);
vuint32m8_t __riscv_vor_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                           size_t vl);
vuint64m1_t __riscv_vor_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                           size_t vl);
vuint64m1_t __riscv_vor_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                           size_t vl);
vuint64m2_t __riscv_vor_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                           size_t vl);
vuint64m2_t __riscv_vor_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                           size_t vl);
vuint64m4_t __riscv_vor_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                           size_t vl);
vuint64m4_t __riscv_vor_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                           size_t vl);
vuint64m8_t __riscv_vor_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                           size_t vl);
vuint64m8_t __riscv_vor_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                           size_t vl);
vuint8mf8_t __riscv_vxor_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vuint8mf8_t __riscv_vxor_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf4_t __riscv_vxor_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vuint8mf4_t __riscv_vxor_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf2_t __riscv_vxor_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vuint8mf2_t __riscv_vxor_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m1_t __riscv_vxor_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           size_t vl);
vuint8m1_t __riscv_vxor_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m2_t __riscv_vxor_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           size_t vl);
vuint8m2_t __riscv_vxor_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m4_t __riscv_vxor_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           size_t vl);
vuint8m4_t __riscv_vxor_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m8_t __riscv_vxor_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           size_t vl);
vuint8m8_t __riscv_vxor_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                           size_t vl);
vuint16mf4_t __riscv_vxor_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vxor_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16mf2_t __riscv_vxor_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vxor_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m1_t __riscv_vxor_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            size_t vl);
vuint16m1_t __riscv_vxor_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m2_t __riscv_vxor_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            size_t vl);
vuint16m2_t __riscv_vxor_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m4_t __riscv_vxor_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            size_t vl);
vuint16m4_t __riscv_vxor_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m8_t __riscv_vxor_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            size_t vl);
vuint16m8_t __riscv_vxor_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                            size_t vl);
vuint32mf2_t __riscv_vxor_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vxor_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m1_t __riscv_vxor_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            size_t vl);
vuint32m1_t __riscv_vxor_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m2_t __riscv_vxor_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            size_t vl);
vuint32m2_t __riscv_vxor_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m4_t __riscv_vxor_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            size_t vl);
vuint32m4_t __riscv_vxor_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m8_t __riscv_vxor_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            size_t vl);
vuint32m8_t __riscv_vxor_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                            size_t vl);
vuint64m1_t __riscv_vxor_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            size_t vl);
vuint64m1_t __riscv_vxor_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m2_t __riscv_vxor_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            size_t vl);
vuint64m2_t __riscv_vxor_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m4_t __riscv_vxor_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            size_t vl);
vuint64m4_t __riscv_vxor_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m8_t __riscv_vxor_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            size_t vl);
vuint64m8_t __riscv_vxor_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                            size_t vl);
// masked functions
vint8mf8_t __riscv_vand_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vand_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vand_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vand_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vand_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vand_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vand_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vand_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vand_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vand_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vand_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vand_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vand_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vand_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vand_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vand_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vand_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vand_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vand_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vand_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vand_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vand_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vand_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vand_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vand_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vand_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vand_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vand_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vand_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vand_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vand_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vand_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vand_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vand_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vand_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vand_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vand_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vand_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vand_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vand_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vand_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vand_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vand_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vand_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vor_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vor_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vor_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vor_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vor_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vor_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vor_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vor_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vor_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vor_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vor_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vor_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vor_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vor_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vor_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vor_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vor_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vor_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vor_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vor_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vor_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vor_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vor_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vor_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vor_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vor_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vor_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vor_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vor_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vor_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vor_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vor_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vor_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vor_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vor_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vor_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vor_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vor_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vor_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vor_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vor_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vor_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vor_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vor_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vint8mf8_t __riscv_vxor_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vxor_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vxor_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vxor_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vxor_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vxor_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vxor_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vxor_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vxor_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vxor_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vxor_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vxor_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vxor_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vxor_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vxor_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vxor_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vxor_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vxor_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vxor_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vxor_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vxor_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vxor_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vxor_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vxor_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vxor_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vxor_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vxor_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vxor_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vxor_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vxor_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vxor_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vxor_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vxor_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vxor_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vxor_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vxor_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vxor_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vxor_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vxor_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vxor_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vxor_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vxor_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vxor_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vxor_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vand_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vand_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vand_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vand_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vand_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vand_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vand_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vand_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vand_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vand_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vand_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vand_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vand_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vand_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vand_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vand_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vand_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vand_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vand_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vand_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vand_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vand_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vand_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vand_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vand_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vand_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vand_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vand_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vand_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vand_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vand_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vand_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vand_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vand_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vand_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vand_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vand_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vand_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vand_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vand_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vand_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vand_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vand_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vand_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vor_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vor_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vor_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vor_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vor_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vor_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vor_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vor_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vor_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vor_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vor_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vor_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vor_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vor_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vor_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vor_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vor_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vor_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vor_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vor_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vor_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vor_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vor_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vor_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vor_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vor_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vor_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vor_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vor_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vor_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vor_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vor_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vor_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vor_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vor_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vor_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vor_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vor_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vor_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vor_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vor_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vor_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vor_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vor_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vxor_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vxor_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vxor_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vxor_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vxor_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vxor_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vxor_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vxor_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vxor_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vxor_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vxor_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vxor_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vxor_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vxor_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vxor_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vxor_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vxor_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vxor_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vxor_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vxor_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vxor_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vxor_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vxor_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vxor_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vxor_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vxor_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vxor_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vxor_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vxor_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vxor_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vxor_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vxor_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vxor_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vxor_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vxor_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vxor_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vxor_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vxor_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vxor_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vxor_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vxor_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vxor_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vxor_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vxor_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vand_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vand_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vand_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vand_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vand_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vand_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vand_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vand_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vand_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vand_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vand_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vand_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vand_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vand_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vand_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vand_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vand_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vand_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vand_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vand_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vand_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vand_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vand_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vand_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vand_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vand_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vand_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vand_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vand_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vand_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vand_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vand_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vand_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vand_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vand_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vand_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vand_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vand_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vand_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vand_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vand_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vand_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vand_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vand_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vor_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vor_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vor_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vor_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vor_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vor_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vor_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vor_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vor_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vor_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vor_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vor_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vor_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vor_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vor_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vor_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vor_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vor_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vor_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vor_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vor_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vor_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vor_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vor_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vor_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vor_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vor_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vor_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vor_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vor_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vor_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vor_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vor_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vor_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vor_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vor_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vor_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vor_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vor_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vor_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vor_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vor_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vor_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vor_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vxor_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vxor_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vxor_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vxor_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vxor_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vxor_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vxor_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vxor_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vxor_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vxor_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vxor_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vxor_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vxor_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vxor_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vxor_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vxor_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vxor_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vxor_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vxor_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vxor_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vxor_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vxor_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vxor_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vxor_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vxor_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vxor_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vxor_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vxor_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vxor_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vxor_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vxor_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vxor_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vxor_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vxor_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vxor_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vxor_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vxor_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vxor_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vxor_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vxor_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vxor_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vxor_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vxor_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vxor_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vand_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vand_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vand_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vand_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vand_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vand_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vand_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vand_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vand_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vand_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vand_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vand_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vand_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vand_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vand_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vand_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vand_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vand_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vand_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vand_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vand_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vand_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vand_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vand_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vand_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vand_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vand_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vand_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vand_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vand_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vand_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vand_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vand_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vand_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vand_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vand_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vand_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vand_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vand_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vand_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vand_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vand_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vand_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vand_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vor_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vor_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vor_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vor_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vor_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vor_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vor_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vor_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vor_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vor_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vor_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vor_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vor_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vor_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vor_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vor_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vor_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vor_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vor_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vor_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vor_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vor_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vor_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vor_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vor_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vor_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vor_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vor_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vor_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vor_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vor_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vor_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vor_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vor_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vor_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vor_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vor_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vor_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vor_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vor_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vor_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vor_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vor_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vor_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vxor_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vxor_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vxor_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vxor_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vxor_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vxor_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vxor_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vxor_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vxor_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vxor_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vxor_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vxor_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vxor_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vxor_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vxor_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vxor_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vxor_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vxor_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vxor_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vxor_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vxor_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vxor_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vxor_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vxor_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vxor_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vxor_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vxor_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vxor_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vxor_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vxor_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vxor_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vxor_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vxor_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vxor_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vxor_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vxor_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vxor_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vxor_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vxor_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vxor_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vxor_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vxor_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vxor_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vxor_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vand_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vand_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vand_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vand_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vand_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vand_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vand_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vand_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vand_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vand_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vand_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vand_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vand_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vand_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vand_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vand_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vand_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vand_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vand_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vand_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vand_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vand_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vand_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vand_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vand_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vand_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vand_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vand_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vand_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vand_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vand_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vand_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vand_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vand_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vand_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vand_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vand_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vand_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vand_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vand_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vand_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vand_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vand_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vand_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vint8mf8_t __riscv_vor_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                          vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vor_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                          int8_t rs1, size_t vl);
vint8mf4_t __riscv_vor_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                          vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vor_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                          int8_t rs1, size_t vl);
vint8mf2_t __riscv_vor_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                          vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vor_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                          int8_t rs1, size_t vl);
vint8m1_t __riscv_vor_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                         vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vor_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                         size_t vl);
vint8m2_t __riscv_vor_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                         vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vor_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                         size_t vl);
vint8m4_t __riscv_vor_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                         vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vor_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                         size_t vl);
vint8m8_t __riscv_vor_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                         vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vor_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                         size_t vl);
vint16mf4_t __riscv_vor_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                           vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vor_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                           int16_t rs1, size_t vl);
vint16mf2_t __riscv_vor_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                           vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vor_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                           int16_t rs1, size_t vl);
vint16m1_t __riscv_vor_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                          vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vor_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                          int16_t rs1, size_t vl);
vint16m2_t __riscv_vor_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                          vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vor_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                          int16_t rs1, size_t vl);
vint16m4_t __riscv_vor_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                          vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vor_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                          int16_t rs1, size_t vl);
vint16m8_t __riscv_vor_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                          vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vor_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                          int16_t rs1, size_t vl);
vint32mf2_t __riscv_vor_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                           vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vor_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                           int32_t rs1, size_t vl);
vint32m1_t __riscv_vor_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                          vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vor_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                          int32_t rs1, size_t vl);
vint32m2_t __riscv_vor_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                          vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vor_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                          int32_t rs1, size_t vl);
vint32m4_t __riscv_vor_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                          vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vor_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                          int32_t rs1, size_t vl);
vint32m8_t __riscv_vor_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                          vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vor_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                          int32_t rs1, size_t vl);
vint64m1_t __riscv_vor_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                          vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vor_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                          int64_t rs1, size_t vl);
vint64m2_t __riscv_vor_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                          vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vor_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                          int64_t rs1, size_t vl);
vint64m4_t __riscv_vor_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                          vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vor_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                          int64_t rs1, size_t vl);
vint64m8_t __riscv_vor_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                          vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vor_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                          int64_t rs1, size_t vl);
vint8mf8_t __riscv_vxor_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vxor_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vxor_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vxor_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vxor_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vxor_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vxor_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vxor_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vxor_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vxor_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vxor_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vxor_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vxor_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vxor_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vxor_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vxor_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vxor_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vxor_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vxor_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vxor_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vxor_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vxor_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vxor_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vxor_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vxor_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vxor_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vxor_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vxor_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vxor_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vxor_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vxor_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vxor_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vxor_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vxor_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vxor_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vxor_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vxor_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vxor_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vxor_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vxor_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vxor_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vxor_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vxor_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vxor_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vand_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vand_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vand_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vand_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vand_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vand_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vand_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vand_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vand_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vand_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vand_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vand_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vand_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vand_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vand_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vand_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vand_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vand_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vand_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vand_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vand_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vand_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vand_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vand_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vand_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vand_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vand_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vand_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vand_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vand_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vand_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vand_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vand_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vand_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vand_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vand_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vand_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vand_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vand_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vand_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vand_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vand_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vand_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vand_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vor_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                           vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vor_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                           uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vor_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                           vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vor_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                           uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vor_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                           vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vor_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vor_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                          vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vor_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                          uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vor_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                          vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vor_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                          uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vor_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                          vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vor_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                          uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vor_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                          vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vor_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                          uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vor_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                            vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vor_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                            uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vor_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                            vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vor_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vor_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                           vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vor_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                           uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vor_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                           vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vor_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                           uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vor_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                           vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vor_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                           uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vor_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                           vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vor_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                           uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vor_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                            vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vor_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vor_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                           vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vor_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                           uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vor_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                           vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vor_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                           uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vor_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                           vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vor_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                           uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vor_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                           vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vor_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                           uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vor_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                           vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vor_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                           uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vor_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                           vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vor_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                           uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vor_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                           vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vor_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                           uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vor_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                           vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vor_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                           uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vxor_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vxor_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vxor_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vxor_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vxor_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vxor_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vxor_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vxor_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vxor_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vxor_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vxor_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vxor_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vxor_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vxor_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vxor_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vxor_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vxor_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vxor_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vxor_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vxor_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vxor_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vxor_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vxor_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vxor_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vxor_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vxor_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vxor_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vxor_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vxor_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vxor_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vxor_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vxor_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vxor_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vxor_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vxor_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vxor_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vxor_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vxor_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vxor_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vxor_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vxor_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vxor_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vxor_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vxor_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            uint64_t rs1, size_t vl);

Vector Bitwise Unary Logical Intrinsics

vint8mf8_t __riscv_vnot_tu(vint8mf8_t vd, vint8mf8_t vs, size_t vl);
vint8mf4_t __riscv_vnot_tu(vint8mf4_t vd, vint8mf4_t vs, size_t vl);
vint8mf2_t __riscv_vnot_tu(vint8mf2_t vd, vint8mf2_t vs, size_t vl);
vint8m1_t __riscv_vnot_tu(vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vnot_tu(vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vnot_tu(vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vnot_tu(vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vnot_tu(vint16mf4_t vd, vint16mf4_t vs, size_t vl);
vint16mf2_t __riscv_vnot_tu(vint16mf2_t vd, vint16mf2_t vs, size_t vl);
vint16m1_t __riscv_vnot_tu(vint16m1_t vd, vint16m1_t vs, size_t vl);
vint16m2_t __riscv_vnot_tu(vint16m2_t vd, vint16m2_t vs, size_t vl);
vint16m4_t __riscv_vnot_tu(vint16m4_t vd, vint16m4_t vs, size_t vl);
vint16m8_t __riscv_vnot_tu(vint16m8_t vd, vint16m8_t vs, size_t vl);
vint32mf2_t __riscv_vnot_tu(vint32mf2_t vd, vint32mf2_t vs, size_t vl);
vint32m1_t __riscv_vnot_tu(vint32m1_t vd, vint32m1_t vs, size_t vl);
vint32m2_t __riscv_vnot_tu(vint32m2_t vd, vint32m2_t vs, size_t vl);
vint32m4_t __riscv_vnot_tu(vint32m4_t vd, vint32m4_t vs, size_t vl);
vint32m8_t __riscv_vnot_tu(vint32m8_t vd, vint32m8_t vs, size_t vl);
vint64m1_t __riscv_vnot_tu(vint64m1_t vd, vint64m1_t vs, size_t vl);
vint64m2_t __riscv_vnot_tu(vint64m2_t vd, vint64m2_t vs, size_t vl);
vint64m4_t __riscv_vnot_tu(vint64m4_t vd, vint64m4_t vs, size_t vl);
vint64m8_t __riscv_vnot_tu(vint64m8_t vd, vint64m8_t vs, size_t vl);
vuint8mf8_t __riscv_vnot_tu(vuint8mf8_t vd, vuint8mf8_t vs, size_t vl);
vuint8mf4_t __riscv_vnot_tu(vuint8mf4_t vd, vuint8mf4_t vs, size_t vl);
vuint8mf2_t __riscv_vnot_tu(vuint8mf2_t vd, vuint8mf2_t vs, size_t vl);
vuint8m1_t __riscv_vnot_tu(vuint8m1_t vd, vuint8m1_t vs, size_t vl);
vuint8m2_t __riscv_vnot_tu(vuint8m2_t vd, vuint8m2_t vs, size_t vl);
vuint8m4_t __riscv_vnot_tu(vuint8m4_t vd, vuint8m4_t vs, size_t vl);
vuint8m8_t __riscv_vnot_tu(vuint8m8_t vd, vuint8m8_t vs, size_t vl);
vuint16mf4_t __riscv_vnot_tu(vuint16mf4_t vd, vuint16mf4_t vs, size_t vl);
vuint16mf2_t __riscv_vnot_tu(vuint16mf2_t vd, vuint16mf2_t vs, size_t vl);
vuint16m1_t __riscv_vnot_tu(vuint16m1_t vd, vuint16m1_t vs, size_t vl);
vuint16m2_t __riscv_vnot_tu(vuint16m2_t vd, vuint16m2_t vs, size_t vl);
vuint16m4_t __riscv_vnot_tu(vuint16m4_t vd, vuint16m4_t vs, size_t vl);
vuint16m8_t __riscv_vnot_tu(vuint16m8_t vd, vuint16m8_t vs, size_t vl);
vuint32mf2_t __riscv_vnot_tu(vuint32mf2_t vd, vuint32mf2_t vs, size_t vl);
vuint32m1_t __riscv_vnot_tu(vuint32m1_t vd, vuint32m1_t vs, size_t vl);
vuint32m2_t __riscv_vnot_tu(vuint32m2_t vd, vuint32m2_t vs, size_t vl);
vuint32m4_t __riscv_vnot_tu(vuint32m4_t vd, vuint32m4_t vs, size_t vl);
vuint32m8_t __riscv_vnot_tu(vuint32m8_t vd, vuint32m8_t vs, size_t vl);
vuint64m1_t __riscv_vnot_tu(vuint64m1_t vd, vuint64m1_t vs, size_t vl);
vuint64m2_t __riscv_vnot_tu(vuint64m2_t vd, vuint64m2_t vs, size_t vl);
vuint64m4_t __riscv_vnot_tu(vuint64m4_t vd, vuint64m4_t vs, size_t vl);
vuint64m8_t __riscv_vnot_tu(vuint64m8_t vd, vuint64m8_t vs, size_t vl);
// masked functions
vint8mf8_t __riscv_vnot_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs,
                            size_t vl);
vint8mf4_t __riscv_vnot_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs,
                            size_t vl);
vint8mf2_t __riscv_vnot_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs,
                            size_t vl);
vint8m1_t __riscv_vnot_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vnot_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vnot_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vnot_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vnot_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs,
                             size_t vl);
vint16mf2_t __riscv_vnot_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs,
                             size_t vl);
vint16m1_t __riscv_vnot_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs,
                            size_t vl);
vint16m2_t __riscv_vnot_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs,
                            size_t vl);
vint16m4_t __riscv_vnot_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs,
                            size_t vl);
vint16m8_t __riscv_vnot_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs,
                            size_t vl);
vint32mf2_t __riscv_vnot_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs,
                             size_t vl);
vint32m1_t __riscv_vnot_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs,
                            size_t vl);
vint32m2_t __riscv_vnot_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs,
                            size_t vl);
vint32m4_t __riscv_vnot_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs,
                            size_t vl);
vint32m8_t __riscv_vnot_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs,
                            size_t vl);
vint64m1_t __riscv_vnot_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs,
                            size_t vl);
vint64m2_t __riscv_vnot_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs,
                            size_t vl);
vint64m4_t __riscv_vnot_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs,
                            size_t vl);
vint64m8_t __riscv_vnot_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs,
                            size_t vl);
vuint8mf8_t __riscv_vnot_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs,
                             size_t vl);
vuint8mf4_t __riscv_vnot_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs,
                             size_t vl);
vuint8mf2_t __riscv_vnot_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs,
                             size_t vl);
vuint8m1_t __riscv_vnot_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs,
                            size_t vl);
vuint8m2_t __riscv_vnot_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs,
                            size_t vl);
vuint8m4_t __riscv_vnot_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs,
                            size_t vl);
vuint8m8_t __riscv_vnot_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs,
                            size_t vl);
vuint16mf4_t __riscv_vnot_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs,
                              size_t vl);
vuint16mf2_t __riscv_vnot_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs,
                              size_t vl);
vuint16m1_t __riscv_vnot_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs,
                             size_t vl);
vuint16m2_t __riscv_vnot_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs,
                             size_t vl);
vuint16m4_t __riscv_vnot_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs,
                             size_t vl);
vuint16m8_t __riscv_vnot_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs,
                             size_t vl);
vuint32mf2_t __riscv_vnot_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs,
                              size_t vl);
vuint32m1_t __riscv_vnot_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs,
                             size_t vl);
vuint32m2_t __riscv_vnot_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs,
                             size_t vl);
vuint32m4_t __riscv_vnot_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs,
                             size_t vl);
vuint32m8_t __riscv_vnot_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs,
                             size_t vl);
vuint64m1_t __riscv_vnot_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs,
                             size_t vl);
vuint64m2_t __riscv_vnot_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs,
                             size_t vl);
vuint64m4_t __riscv_vnot_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs,
                             size_t vl);
vuint64m8_t __riscv_vnot_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs,
                             size_t vl);
// masked functions
vint8mf8_t __riscv_vnot_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs,
                             size_t vl);
vint8mf4_t __riscv_vnot_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs,
                             size_t vl);
vint8mf2_t __riscv_vnot_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs,
                             size_t vl);
vint8m1_t __riscv_vnot_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vnot_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vnot_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vnot_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vnot_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs,
                              size_t vl);
vint16mf2_t __riscv_vnot_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs,
                              size_t vl);
vint16m1_t __riscv_vnot_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs,
                             size_t vl);
vint16m2_t __riscv_vnot_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs,
                             size_t vl);
vint16m4_t __riscv_vnot_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs,
                             size_t vl);
vint16m8_t __riscv_vnot_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs,
                             size_t vl);
vint32mf2_t __riscv_vnot_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs,
                              size_t vl);
vint32m1_t __riscv_vnot_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs,
                             size_t vl);
vint32m2_t __riscv_vnot_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs,
                             size_t vl);
vint32m4_t __riscv_vnot_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs,
                             size_t vl);
vint32m8_t __riscv_vnot_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs,
                             size_t vl);
vint64m1_t __riscv_vnot_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs,
                             size_t vl);
vint64m2_t __riscv_vnot_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs,
                             size_t vl);
vint64m4_t __riscv_vnot_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs,
                             size_t vl);
vint64m8_t __riscv_vnot_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs,
                             size_t vl);
vuint8mf8_t __riscv_vnot_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs,
                              size_t vl);
vuint8mf4_t __riscv_vnot_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs,
                              size_t vl);
vuint8mf2_t __riscv_vnot_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs,
                              size_t vl);
vuint8m1_t __riscv_vnot_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs,
                             size_t vl);
vuint8m2_t __riscv_vnot_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs,
                             size_t vl);
vuint8m4_t __riscv_vnot_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs,
                             size_t vl);
vuint8m8_t __riscv_vnot_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs,
                             size_t vl);
vuint16mf4_t __riscv_vnot_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs,
                               size_t vl);
vuint16mf2_t __riscv_vnot_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs,
                               size_t vl);
vuint16m1_t __riscv_vnot_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs,
                              size_t vl);
vuint16m2_t __riscv_vnot_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs,
                              size_t vl);
vuint16m4_t __riscv_vnot_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs,
                              size_t vl);
vuint16m8_t __riscv_vnot_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs,
                              size_t vl);
vuint32mf2_t __riscv_vnot_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs,
                               size_t vl);
vuint32m1_t __riscv_vnot_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs,
                              size_t vl);
vuint32m2_t __riscv_vnot_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs,
                              size_t vl);
vuint32m4_t __riscv_vnot_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs,
                              size_t vl);
vuint32m8_t __riscv_vnot_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs,
                              size_t vl);
vuint64m1_t __riscv_vnot_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs,
                              size_t vl);
vuint64m2_t __riscv_vnot_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs,
                              size_t vl);
vuint64m4_t __riscv_vnot_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs,
                              size_t vl);
vuint64m8_t __riscv_vnot_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs,
                              size_t vl);
// masked functions
vint8mf8_t __riscv_vnot_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs,
                           size_t vl);
vint8mf4_t __riscv_vnot_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs,
                           size_t vl);
vint8mf2_t __riscv_vnot_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs,
                           size_t vl);
vint8m1_t __riscv_vnot_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs, size_t vl);
vint8m2_t __riscv_vnot_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs, size_t vl);
vint8m4_t __riscv_vnot_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs, size_t vl);
vint8m8_t __riscv_vnot_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs, size_t vl);
vint16mf4_t __riscv_vnot_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs,
                            size_t vl);
vint16mf2_t __riscv_vnot_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs,
                            size_t vl);
vint16m1_t __riscv_vnot_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs,
                           size_t vl);
vint16m2_t __riscv_vnot_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs,
                           size_t vl);
vint16m4_t __riscv_vnot_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs,
                           size_t vl);
vint16m8_t __riscv_vnot_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs,
                           size_t vl);
vint32mf2_t __riscv_vnot_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs,
                            size_t vl);
vint32m1_t __riscv_vnot_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs,
                           size_t vl);
vint32m2_t __riscv_vnot_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs,
                           size_t vl);
vint32m4_t __riscv_vnot_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs,
                           size_t vl);
vint32m8_t __riscv_vnot_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs,
                           size_t vl);
vint64m1_t __riscv_vnot_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs,
                           size_t vl);
vint64m2_t __riscv_vnot_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs,
                           size_t vl);
vint64m4_t __riscv_vnot_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs,
                           size_t vl);
vint64m8_t __riscv_vnot_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs,
                           size_t vl);
vuint8mf8_t __riscv_vnot_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs,
                            size_t vl);
vuint8mf4_t __riscv_vnot_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs,
                            size_t vl);
vuint8mf2_t __riscv_vnot_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs,
                            size_t vl);
vuint8m1_t __riscv_vnot_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs,
                           size_t vl);
vuint8m2_t __riscv_vnot_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs,
                           size_t vl);
vuint8m4_t __riscv_vnot_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs,
                           size_t vl);
vuint8m8_t __riscv_vnot_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs,
                           size_t vl);
vuint16mf4_t __riscv_vnot_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs,
                             size_t vl);
vuint16mf2_t __riscv_vnot_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs,
                             size_t vl);
vuint16m1_t __riscv_vnot_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs,
                            size_t vl);
vuint16m2_t __riscv_vnot_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs,
                            size_t vl);
vuint16m4_t __riscv_vnot_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs,
                            size_t vl);
vuint16m8_t __riscv_vnot_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs,
                            size_t vl);
vuint32mf2_t __riscv_vnot_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs,
                             size_t vl);
vuint32m1_t __riscv_vnot_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs,
                            size_t vl);
vuint32m2_t __riscv_vnot_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs,
                            size_t vl);
vuint32m4_t __riscv_vnot_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs,
                            size_t vl);
vuint32m8_t __riscv_vnot_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs,
                            size_t vl);
vuint64m1_t __riscv_vnot_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs,
                            size_t vl);
vuint64m2_t __riscv_vnot_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs,
                            size_t vl);
vuint64m4_t __riscv_vnot_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs,
                            size_t vl);
vuint64m8_t __riscv_vnot_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs,
                            size_t vl);

Vector Single-Width Bit Shift Intrinsics

vint8mf8_t __riscv_vsll_tu(vint8mf8_t vd, vint8mf8_t vs2, vuint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vsll_tu(vint8mf8_t vd, vint8mf8_t vs2, size_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vsll_tu(vint8mf4_t vd, vint8mf4_t vs2, vuint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vsll_tu(vint8mf4_t vd, vint8mf4_t vs2, size_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vsll_tu(vint8mf2_t vd, vint8mf2_t vs2, vuint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vsll_tu(vint8mf2_t vd, vint8mf2_t vs2, size_t rs1,
                           size_t vl);
vint8m1_t __riscv_vsll_tu(vint8m1_t vd, vint8m1_t vs2, vuint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vsll_tu(vint8m1_t vd, vint8m1_t vs2, size_t rs1, size_t vl);
vint8m2_t __riscv_vsll_tu(vint8m2_t vd, vint8m2_t vs2, vuint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vsll_tu(vint8m2_t vd, vint8m2_t vs2, size_t rs1, size_t vl);
vint8m4_t __riscv_vsll_tu(vint8m4_t vd, vint8m4_t vs2, vuint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vsll_tu(vint8m4_t vd, vint8m4_t vs2, size_t rs1, size_t vl);
vint8m8_t __riscv_vsll_tu(vint8m8_t vd, vint8m8_t vs2, vuint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vsll_tu(vint8m8_t vd, vint8m8_t vs2, size_t rs1, size_t vl);
vint16mf4_t __riscv_vsll_tu(vint16mf4_t vd, vint16mf4_t vs2, vuint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vsll_tu(vint16mf4_t vd, vint16mf4_t vs2, size_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vsll_tu(vint16mf2_t vd, vint16mf2_t vs2, vuint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vsll_tu(vint16mf2_t vd, vint16mf2_t vs2, size_t rs1,
                            size_t vl);
vint16m1_t __riscv_vsll_tu(vint16m1_t vd, vint16m1_t vs2, vuint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vsll_tu(vint16m1_t vd, vint16m1_t vs2, size_t rs1,
                           size_t vl);
vint16m2_t __riscv_vsll_tu(vint16m2_t vd, vint16m2_t vs2, vuint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vsll_tu(vint16m2_t vd, vint16m2_t vs2, size_t rs1,
                           size_t vl);
vint16m4_t __riscv_vsll_tu(vint16m4_t vd, vint16m4_t vs2, vuint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vsll_tu(vint16m4_t vd, vint16m4_t vs2, size_t rs1,
                           size_t vl);
vint16m8_t __riscv_vsll_tu(vint16m8_t vd, vint16m8_t vs2, vuint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vsll_tu(vint16m8_t vd, vint16m8_t vs2, size_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vsll_tu(vint32mf2_t vd, vint32mf2_t vs2, vuint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vsll_tu(vint32mf2_t vd, vint32mf2_t vs2, size_t rs1,
                            size_t vl);
vint32m1_t __riscv_vsll_tu(vint32m1_t vd, vint32m1_t vs2, vuint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vsll_tu(vint32m1_t vd, vint32m1_t vs2, size_t rs1,
                           size_t vl);
vint32m2_t __riscv_vsll_tu(vint32m2_t vd, vint32m2_t vs2, vuint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vsll_tu(vint32m2_t vd, vint32m2_t vs2, size_t rs1,
                           size_t vl);
vint32m4_t __riscv_vsll_tu(vint32m4_t vd, vint32m4_t vs2, vuint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vsll_tu(vint32m4_t vd, vint32m4_t vs2, size_t rs1,
                           size_t vl);
vint32m8_t __riscv_vsll_tu(vint32m8_t vd, vint32m8_t vs2, vuint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vsll_tu(vint32m8_t vd, vint32m8_t vs2, size_t rs1,
                           size_t vl);
vint64m1_t __riscv_vsll_tu(vint64m1_t vd, vint64m1_t vs2, vuint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vsll_tu(vint64m1_t vd, vint64m1_t vs2, size_t rs1,
                           size_t vl);
vint64m2_t __riscv_vsll_tu(vint64m2_t vd, vint64m2_t vs2, vuint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vsll_tu(vint64m2_t vd, vint64m2_t vs2, size_t rs1,
                           size_t vl);
vint64m4_t __riscv_vsll_tu(vint64m4_t vd, vint64m4_t vs2, vuint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vsll_tu(vint64m4_t vd, vint64m4_t vs2, size_t rs1,
                           size_t vl);
vint64m8_t __riscv_vsll_tu(vint64m8_t vd, vint64m8_t vs2, vuint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vsll_tu(vint64m8_t vd, vint64m8_t vs2, size_t rs1,
                           size_t vl);
vint8mf8_t __riscv_vsra_tu(vint8mf8_t vd, vint8mf8_t vs2, vuint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vsra_tu(vint8mf8_t vd, vint8mf8_t vs2, size_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vsra_tu(vint8mf4_t vd, vint8mf4_t vs2, vuint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vsra_tu(vint8mf4_t vd, vint8mf4_t vs2, size_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vsra_tu(vint8mf2_t vd, vint8mf2_t vs2, vuint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vsra_tu(vint8mf2_t vd, vint8mf2_t vs2, size_t rs1,
                           size_t vl);
vint8m1_t __riscv_vsra_tu(vint8m1_t vd, vint8m1_t vs2, vuint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vsra_tu(vint8m1_t vd, vint8m1_t vs2, size_t rs1, size_t vl);
vint8m2_t __riscv_vsra_tu(vint8m2_t vd, vint8m2_t vs2, vuint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vsra_tu(vint8m2_t vd, vint8m2_t vs2, size_t rs1, size_t vl);
vint8m4_t __riscv_vsra_tu(vint8m4_t vd, vint8m4_t vs2, vuint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vsra_tu(vint8m4_t vd, vint8m4_t vs2, size_t rs1, size_t vl);
vint8m8_t __riscv_vsra_tu(vint8m8_t vd, vint8m8_t vs2, vuint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vsra_tu(vint8m8_t vd, vint8m8_t vs2, size_t rs1, size_t vl);
vint16mf4_t __riscv_vsra_tu(vint16mf4_t vd, vint16mf4_t vs2, vuint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vsra_tu(vint16mf4_t vd, vint16mf4_t vs2, size_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vsra_tu(vint16mf2_t vd, vint16mf2_t vs2, vuint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vsra_tu(vint16mf2_t vd, vint16mf2_t vs2, size_t rs1,
                            size_t vl);
vint16m1_t __riscv_vsra_tu(vint16m1_t vd, vint16m1_t vs2, vuint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vsra_tu(vint16m1_t vd, vint16m1_t vs2, size_t rs1,
                           size_t vl);
vint16m2_t __riscv_vsra_tu(vint16m2_t vd, vint16m2_t vs2, vuint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vsra_tu(vint16m2_t vd, vint16m2_t vs2, size_t rs1,
                           size_t vl);
vint16m4_t __riscv_vsra_tu(vint16m4_t vd, vint16m4_t vs2, vuint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vsra_tu(vint16m4_t vd, vint16m4_t vs2, size_t rs1,
                           size_t vl);
vint16m8_t __riscv_vsra_tu(vint16m8_t vd, vint16m8_t vs2, vuint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vsra_tu(vint16m8_t vd, vint16m8_t vs2, size_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vsra_tu(vint32mf2_t vd, vint32mf2_t vs2, vuint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vsra_tu(vint32mf2_t vd, vint32mf2_t vs2, size_t rs1,
                            size_t vl);
vint32m1_t __riscv_vsra_tu(vint32m1_t vd, vint32m1_t vs2, vuint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vsra_tu(vint32m1_t vd, vint32m1_t vs2, size_t rs1,
                           size_t vl);
vint32m2_t __riscv_vsra_tu(vint32m2_t vd, vint32m2_t vs2, vuint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vsra_tu(vint32m2_t vd, vint32m2_t vs2, size_t rs1,
                           size_t vl);
vint32m4_t __riscv_vsra_tu(vint32m4_t vd, vint32m4_t vs2, vuint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vsra_tu(vint32m4_t vd, vint32m4_t vs2, size_t rs1,
                           size_t vl);
vint32m8_t __riscv_vsra_tu(vint32m8_t vd, vint32m8_t vs2, vuint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vsra_tu(vint32m8_t vd, vint32m8_t vs2, size_t rs1,
                           size_t vl);
vint64m1_t __riscv_vsra_tu(vint64m1_t vd, vint64m1_t vs2, vuint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vsra_tu(vint64m1_t vd, vint64m1_t vs2, size_t rs1,
                           size_t vl);
vint64m2_t __riscv_vsra_tu(vint64m2_t vd, vint64m2_t vs2, vuint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vsra_tu(vint64m2_t vd, vint64m2_t vs2, size_t rs1,
                           size_t vl);
vint64m4_t __riscv_vsra_tu(vint64m4_t vd, vint64m4_t vs2, vuint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vsra_tu(vint64m4_t vd, vint64m4_t vs2, size_t rs1,
                           size_t vl);
vint64m8_t __riscv_vsra_tu(vint64m8_t vd, vint64m8_t vs2, vuint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vsra_tu(vint64m8_t vd, vint64m8_t vs2, size_t rs1,
                           size_t vl);
vuint8mf8_t __riscv_vsll_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vuint8mf8_t __riscv_vsll_tu(vuint8mf8_t vd, vuint8mf8_t vs2, size_t rs1,
                            size_t vl);
vuint8mf4_t __riscv_vsll_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vuint8mf4_t __riscv_vsll_tu(vuint8mf4_t vd, vuint8mf4_t vs2, size_t rs1,
                            size_t vl);
vuint8mf2_t __riscv_vsll_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vuint8mf2_t __riscv_vsll_tu(vuint8mf2_t vd, vuint8mf2_t vs2, size_t rs1,
                            size_t vl);
vuint8m1_t __riscv_vsll_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           size_t vl);
vuint8m1_t __riscv_vsll_tu(vuint8m1_t vd, vuint8m1_t vs2, size_t rs1,
                           size_t vl);
vuint8m2_t __riscv_vsll_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           size_t vl);
vuint8m2_t __riscv_vsll_tu(vuint8m2_t vd, vuint8m2_t vs2, size_t rs1,
                           size_t vl);
vuint8m4_t __riscv_vsll_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           size_t vl);
vuint8m4_t __riscv_vsll_tu(vuint8m4_t vd, vuint8m4_t vs2, size_t rs1,
                           size_t vl);
vuint8m8_t __riscv_vsll_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           size_t vl);
vuint8m8_t __riscv_vsll_tu(vuint8m8_t vd, vuint8m8_t vs2, size_t rs1,
                           size_t vl);
vuint16mf4_t __riscv_vsll_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsll_tu(vuint16mf4_t vd, vuint16mf4_t vs2, size_t rs1,
                             size_t vl);
vuint16mf2_t __riscv_vsll_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsll_tu(vuint16mf2_t vd, vuint16mf2_t vs2, size_t rs1,
                             size_t vl);
vuint16m1_t __riscv_vsll_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            size_t vl);
vuint16m1_t __riscv_vsll_tu(vuint16m1_t vd, vuint16m1_t vs2, size_t rs1,
                            size_t vl);
vuint16m2_t __riscv_vsll_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            size_t vl);
vuint16m2_t __riscv_vsll_tu(vuint16m2_t vd, vuint16m2_t vs2, size_t rs1,
                            size_t vl);
vuint16m4_t __riscv_vsll_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            size_t vl);
vuint16m4_t __riscv_vsll_tu(vuint16m4_t vd, vuint16m4_t vs2, size_t rs1,
                            size_t vl);
vuint16m8_t __riscv_vsll_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            size_t vl);
vuint16m8_t __riscv_vsll_tu(vuint16m8_t vd, vuint16m8_t vs2, size_t rs1,
                            size_t vl);
vuint32mf2_t __riscv_vsll_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsll_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t rs1,
                             size_t vl);
vuint32m1_t __riscv_vsll_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            size_t vl);
vuint32m1_t __riscv_vsll_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t rs1,
                            size_t vl);
vuint32m2_t __riscv_vsll_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            size_t vl);
vuint32m2_t __riscv_vsll_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t rs1,
                            size_t vl);
vuint32m4_t __riscv_vsll_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            size_t vl);
vuint32m4_t __riscv_vsll_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t rs1,
                            size_t vl);
vuint32m8_t __riscv_vsll_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            size_t vl);
vuint32m8_t __riscv_vsll_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t rs1,
                            size_t vl);
vuint64m1_t __riscv_vsll_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            size_t vl);
vuint64m1_t __riscv_vsll_tu(vuint64m1_t vd, vuint64m1_t vs2, size_t rs1,
                            size_t vl);
vuint64m2_t __riscv_vsll_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            size_t vl);
vuint64m2_t __riscv_vsll_tu(vuint64m2_t vd, vuint64m2_t vs2, size_t rs1,
                            size_t vl);
vuint64m4_t __riscv_vsll_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            size_t vl);
vuint64m4_t __riscv_vsll_tu(vuint64m4_t vd, vuint64m4_t vs2, size_t rs1,
                            size_t vl);
vuint64m8_t __riscv_vsll_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            size_t vl);
vuint64m8_t __riscv_vsll_tu(vuint64m8_t vd, vuint64m8_t vs2, size_t rs1,
                            size_t vl);
vuint8mf8_t __riscv_vsrl_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vuint8mf8_t __riscv_vsrl_tu(vuint8mf8_t vd, vuint8mf8_t vs2, size_t rs1,
                            size_t vl);
vuint8mf4_t __riscv_vsrl_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vuint8mf4_t __riscv_vsrl_tu(vuint8mf4_t vd, vuint8mf4_t vs2, size_t rs1,
                            size_t vl);
vuint8mf2_t __riscv_vsrl_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vuint8mf2_t __riscv_vsrl_tu(vuint8mf2_t vd, vuint8mf2_t vs2, size_t rs1,
                            size_t vl);
vuint8m1_t __riscv_vsrl_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           size_t vl);
vuint8m1_t __riscv_vsrl_tu(vuint8m1_t vd, vuint8m1_t vs2, size_t rs1,
                           size_t vl);
vuint8m2_t __riscv_vsrl_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           size_t vl);
vuint8m2_t __riscv_vsrl_tu(vuint8m2_t vd, vuint8m2_t vs2, size_t rs1,
                           size_t vl);
vuint8m4_t __riscv_vsrl_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           size_t vl);
vuint8m4_t __riscv_vsrl_tu(vuint8m4_t vd, vuint8m4_t vs2, size_t rs1,
                           size_t vl);
vuint8m8_t __riscv_vsrl_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           size_t vl);
vuint8m8_t __riscv_vsrl_tu(vuint8m8_t vd, vuint8m8_t vs2, size_t rs1,
                           size_t vl);
vuint16mf4_t __riscv_vsrl_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsrl_tu(vuint16mf4_t vd, vuint16mf4_t vs2, size_t rs1,
                             size_t vl);
vuint16mf2_t __riscv_vsrl_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsrl_tu(vuint16mf2_t vd, vuint16mf2_t vs2, size_t rs1,
                             size_t vl);
vuint16m1_t __riscv_vsrl_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            size_t vl);
vuint16m1_t __riscv_vsrl_tu(vuint16m1_t vd, vuint16m1_t vs2, size_t rs1,
                            size_t vl);
vuint16m2_t __riscv_vsrl_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            size_t vl);
vuint16m2_t __riscv_vsrl_tu(vuint16m2_t vd, vuint16m2_t vs2, size_t rs1,
                            size_t vl);
vuint16m4_t __riscv_vsrl_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            size_t vl);
vuint16m4_t __riscv_vsrl_tu(vuint16m4_t vd, vuint16m4_t vs2, size_t rs1,
                            size_t vl);
vuint16m8_t __riscv_vsrl_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            size_t vl);
vuint16m8_t __riscv_vsrl_tu(vuint16m8_t vd, vuint16m8_t vs2, size_t rs1,
                            size_t vl);
vuint32mf2_t __riscv_vsrl_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsrl_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t rs1,
                             size_t vl);
vuint32m1_t __riscv_vsrl_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            size_t vl);
vuint32m1_t __riscv_vsrl_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t rs1,
                            size_t vl);
vuint32m2_t __riscv_vsrl_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            size_t vl);
vuint32m2_t __riscv_vsrl_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t rs1,
                            size_t vl);
vuint32m4_t __riscv_vsrl_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            size_t vl);
vuint32m4_t __riscv_vsrl_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t rs1,
                            size_t vl);
vuint32m8_t __riscv_vsrl_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            size_t vl);
vuint32m8_t __riscv_vsrl_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t rs1,
                            size_t vl);
vuint64m1_t __riscv_vsrl_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            size_t vl);
vuint64m1_t __riscv_vsrl_tu(vuint64m1_t vd, vuint64m1_t vs2, size_t rs1,
                            size_t vl);
vuint64m2_t __riscv_vsrl_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            size_t vl);
vuint64m2_t __riscv_vsrl_tu(vuint64m2_t vd, vuint64m2_t vs2, size_t rs1,
                            size_t vl);
vuint64m4_t __riscv_vsrl_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            size_t vl);
vuint64m4_t __riscv_vsrl_tu(vuint64m4_t vd, vuint64m4_t vs2, size_t rs1,
                            size_t vl);
vuint64m8_t __riscv_vsrl_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            size_t vl);
vuint64m8_t __riscv_vsrl_tu(vuint64m8_t vd, vuint64m8_t vs2, size_t rs1,
                            size_t vl);
// masked functions
vint8mf8_t __riscv_vsll_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsll_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            size_t rs1, size_t vl);
vint8mf4_t __riscv_vsll_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsll_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            size_t rs1, size_t vl);
vint8mf2_t __riscv_vsll_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsll_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            size_t rs1, size_t vl);
vint8m1_t __riscv_vsll_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsll_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, size_t rs1,
                           size_t vl);
vint8m2_t __riscv_vsll_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsll_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, size_t rs1,
                           size_t vl);
vint8m4_t __riscv_vsll_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsll_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, size_t rs1,
                           size_t vl);
vint8m8_t __riscv_vsll_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsll_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, size_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vsll_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsll_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             size_t rs1, size_t vl);
vint16mf2_t __riscv_vsll_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsll_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             size_t rs1, size_t vl);
vint16m1_t __riscv_vsll_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsll_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            size_t rs1, size_t vl);
vint16m2_t __riscv_vsll_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsll_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            size_t rs1, size_t vl);
vint16m4_t __riscv_vsll_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsll_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            size_t rs1, size_t vl);
vint16m8_t __riscv_vsll_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsll_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            size_t rs1, size_t vl);
vint32mf2_t __riscv_vsll_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsll_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             size_t rs1, size_t vl);
vint32m1_t __riscv_vsll_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsll_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            size_t rs1, size_t vl);
vint32m2_t __riscv_vsll_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsll_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            size_t rs1, size_t vl);
vint32m4_t __riscv_vsll_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsll_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            size_t rs1, size_t vl);
vint32m8_t __riscv_vsll_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsll_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            size_t rs1, size_t vl);
vint64m1_t __riscv_vsll_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsll_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            size_t rs1, size_t vl);
vint64m2_t __riscv_vsll_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsll_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            size_t rs1, size_t vl);
vint64m4_t __riscv_vsll_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsll_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            size_t rs1, size_t vl);
vint64m8_t __riscv_vsll_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsll_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            size_t rs1, size_t vl);
vint8mf8_t __riscv_vsra_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsra_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            size_t rs1, size_t vl);
vint8mf4_t __riscv_vsra_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsra_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            size_t rs1, size_t vl);
vint8mf2_t __riscv_vsra_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsra_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            size_t rs1, size_t vl);
vint8m1_t __riscv_vsra_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsra_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, size_t rs1,
                           size_t vl);
vint8m2_t __riscv_vsra_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsra_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, size_t rs1,
                           size_t vl);
vint8m4_t __riscv_vsra_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsra_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, size_t rs1,
                           size_t vl);
vint8m8_t __riscv_vsra_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsra_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, size_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vsra_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsra_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             size_t rs1, size_t vl);
vint16mf2_t __riscv_vsra_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsra_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             size_t rs1, size_t vl);
vint16m1_t __riscv_vsra_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsra_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            size_t rs1, size_t vl);
vint16m2_t __riscv_vsra_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsra_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            size_t rs1, size_t vl);
vint16m4_t __riscv_vsra_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsra_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            size_t rs1, size_t vl);
vint16m8_t __riscv_vsra_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsra_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            size_t rs1, size_t vl);
vint32mf2_t __riscv_vsra_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsra_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             size_t rs1, size_t vl);
vint32m1_t __riscv_vsra_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsra_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            size_t rs1, size_t vl);
vint32m2_t __riscv_vsra_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsra_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            size_t rs1, size_t vl);
vint32m4_t __riscv_vsra_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsra_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            size_t rs1, size_t vl);
vint32m8_t __riscv_vsra_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsra_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            size_t rs1, size_t vl);
vint64m1_t __riscv_vsra_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsra_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            size_t rs1, size_t vl);
vint64m2_t __riscv_vsra_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsra_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            size_t rs1, size_t vl);
vint64m4_t __riscv_vsra_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsra_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            size_t rs1, size_t vl);
vint64m8_t __riscv_vsra_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsra_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            size_t rs1, size_t vl);
vuint8mf8_t __riscv_vsll_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsll_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             size_t rs1, size_t vl);
vuint8mf4_t __riscv_vsll_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsll_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             size_t rs1, size_t vl);
vuint8mf2_t __riscv_vsll_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsll_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             size_t rs1, size_t vl);
vuint8m1_t __riscv_vsll_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsll_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            size_t rs1, size_t vl);
vuint8m2_t __riscv_vsll_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsll_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            size_t rs1, size_t vl);
vuint8m4_t __riscv_vsll_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsll_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            size_t rs1, size_t vl);
vuint8m8_t __riscv_vsll_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsll_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            size_t rs1, size_t vl);
vuint16mf4_t __riscv_vsll_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsll_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              size_t rs1, size_t vl);
vuint16mf2_t __riscv_vsll_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsll_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              size_t rs1, size_t vl);
vuint16m1_t __riscv_vsll_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsll_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             size_t rs1, size_t vl);
vuint16m2_t __riscv_vsll_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsll_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             size_t rs1, size_t vl);
vuint16m4_t __riscv_vsll_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsll_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             size_t rs1, size_t vl);
vuint16m8_t __riscv_vsll_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsll_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             size_t rs1, size_t vl);
vuint32mf2_t __riscv_vsll_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsll_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              size_t rs1, size_t vl);
vuint32m1_t __riscv_vsll_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsll_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             size_t rs1, size_t vl);
vuint32m2_t __riscv_vsll_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsll_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             size_t rs1, size_t vl);
vuint32m4_t __riscv_vsll_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsll_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             size_t rs1, size_t vl);
vuint32m8_t __riscv_vsll_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsll_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             size_t rs1, size_t vl);
vuint64m1_t __riscv_vsll_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsll_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             size_t rs1, size_t vl);
vuint64m2_t __riscv_vsll_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsll_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             size_t rs1, size_t vl);
vuint64m4_t __riscv_vsll_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsll_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             size_t rs1, size_t vl);
vuint64m8_t __riscv_vsll_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsll_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             size_t rs1, size_t vl);
vuint8mf8_t __riscv_vsrl_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsrl_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             size_t rs1, size_t vl);
vuint8mf4_t __riscv_vsrl_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsrl_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             size_t rs1, size_t vl);
vuint8mf2_t __riscv_vsrl_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsrl_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             size_t rs1, size_t vl);
vuint8m1_t __riscv_vsrl_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsrl_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            size_t rs1, size_t vl);
vuint8m2_t __riscv_vsrl_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsrl_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            size_t rs1, size_t vl);
vuint8m4_t __riscv_vsrl_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsrl_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            size_t rs1, size_t vl);
vuint8m8_t __riscv_vsrl_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsrl_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            size_t rs1, size_t vl);
vuint16mf4_t __riscv_vsrl_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsrl_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              size_t rs1, size_t vl);
vuint16mf2_t __riscv_vsrl_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsrl_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              size_t rs1, size_t vl);
vuint16m1_t __riscv_vsrl_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsrl_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             size_t rs1, size_t vl);
vuint16m2_t __riscv_vsrl_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsrl_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             size_t rs1, size_t vl);
vuint16m4_t __riscv_vsrl_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsrl_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             size_t rs1, size_t vl);
vuint16m8_t __riscv_vsrl_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsrl_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             size_t rs1, size_t vl);
vuint32mf2_t __riscv_vsrl_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsrl_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              size_t rs1, size_t vl);
vuint32m1_t __riscv_vsrl_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsrl_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             size_t rs1, size_t vl);
vuint32m2_t __riscv_vsrl_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsrl_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             size_t rs1, size_t vl);
vuint32m4_t __riscv_vsrl_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsrl_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             size_t rs1, size_t vl);
vuint32m8_t __riscv_vsrl_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsrl_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             size_t rs1, size_t vl);
vuint64m1_t __riscv_vsrl_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsrl_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             size_t rs1, size_t vl);
vuint64m2_t __riscv_vsrl_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsrl_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             size_t rs1, size_t vl);
vuint64m4_t __riscv_vsrl_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsrl_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             size_t rs1, size_t vl);
vuint64m8_t __riscv_vsrl_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsrl_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             size_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vsll_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsll_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             size_t rs1, size_t vl);
vint8mf4_t __riscv_vsll_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsll_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             size_t rs1, size_t vl);
vint8mf2_t __riscv_vsll_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsll_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             size_t rs1, size_t vl);
vint8m1_t __riscv_vsll_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsll_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            size_t rs1, size_t vl);
vint8m2_t __riscv_vsll_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsll_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            size_t rs1, size_t vl);
vint8m4_t __riscv_vsll_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsll_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            size_t rs1, size_t vl);
vint8m8_t __riscv_vsll_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsll_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            size_t rs1, size_t vl);
vint16mf4_t __riscv_vsll_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsll_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              size_t rs1, size_t vl);
vint16mf2_t __riscv_vsll_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsll_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              size_t rs1, size_t vl);
vint16m1_t __riscv_vsll_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsll_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             size_t rs1, size_t vl);
vint16m2_t __riscv_vsll_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsll_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             size_t rs1, size_t vl);
vint16m4_t __riscv_vsll_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsll_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             size_t rs1, size_t vl);
vint16m8_t __riscv_vsll_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsll_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             size_t rs1, size_t vl);
vint32mf2_t __riscv_vsll_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsll_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              size_t rs1, size_t vl);
vint32m1_t __riscv_vsll_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsll_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             size_t rs1, size_t vl);
vint32m2_t __riscv_vsll_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsll_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             size_t rs1, size_t vl);
vint32m4_t __riscv_vsll_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsll_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             size_t rs1, size_t vl);
vint32m8_t __riscv_vsll_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsll_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             size_t rs1, size_t vl);
vint64m1_t __riscv_vsll_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsll_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             size_t rs1, size_t vl);
vint64m2_t __riscv_vsll_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsll_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             size_t rs1, size_t vl);
vint64m4_t __riscv_vsll_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsll_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             size_t rs1, size_t vl);
vint64m8_t __riscv_vsll_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsll_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             size_t rs1, size_t vl);
vint8mf8_t __riscv_vsra_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsra_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             size_t rs1, size_t vl);
vint8mf4_t __riscv_vsra_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsra_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             size_t rs1, size_t vl);
vint8mf2_t __riscv_vsra_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsra_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             size_t rs1, size_t vl);
vint8m1_t __riscv_vsra_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsra_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            size_t rs1, size_t vl);
vint8m2_t __riscv_vsra_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsra_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            size_t rs1, size_t vl);
vint8m4_t __riscv_vsra_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsra_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            size_t rs1, size_t vl);
vint8m8_t __riscv_vsra_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsra_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            size_t rs1, size_t vl);
vint16mf4_t __riscv_vsra_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsra_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              size_t rs1, size_t vl);
vint16mf2_t __riscv_vsra_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsra_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              size_t rs1, size_t vl);
vint16m1_t __riscv_vsra_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsra_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             size_t rs1, size_t vl);
vint16m2_t __riscv_vsra_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsra_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             size_t rs1, size_t vl);
vint16m4_t __riscv_vsra_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsra_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             size_t rs1, size_t vl);
vint16m8_t __riscv_vsra_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsra_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             size_t rs1, size_t vl);
vint32mf2_t __riscv_vsra_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsra_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              size_t rs1, size_t vl);
vint32m1_t __riscv_vsra_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsra_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             size_t rs1, size_t vl);
vint32m2_t __riscv_vsra_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsra_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             size_t rs1, size_t vl);
vint32m4_t __riscv_vsra_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsra_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             size_t rs1, size_t vl);
vint32m8_t __riscv_vsra_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsra_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             size_t rs1, size_t vl);
vint64m1_t __riscv_vsra_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsra_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             size_t rs1, size_t vl);
vint64m2_t __riscv_vsra_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsra_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             size_t rs1, size_t vl);
vint64m4_t __riscv_vsra_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsra_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             size_t rs1, size_t vl);
vint64m8_t __riscv_vsra_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsra_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             size_t rs1, size_t vl);
vuint8mf8_t __riscv_vsll_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsll_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              size_t rs1, size_t vl);
vuint8mf4_t __riscv_vsll_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsll_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              size_t rs1, size_t vl);
vuint8mf2_t __riscv_vsll_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsll_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              size_t rs1, size_t vl);
vuint8m1_t __riscv_vsll_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsll_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             size_t rs1, size_t vl);
vuint8m2_t __riscv_vsll_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsll_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             size_t rs1, size_t vl);
vuint8m4_t __riscv_vsll_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsll_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             size_t rs1, size_t vl);
vuint8m8_t __riscv_vsll_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsll_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             size_t rs1, size_t vl);
vuint16mf4_t __riscv_vsll_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsll_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               size_t rs1, size_t vl);
vuint16mf2_t __riscv_vsll_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsll_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               size_t rs1, size_t vl);
vuint16m1_t __riscv_vsll_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsll_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              size_t rs1, size_t vl);
vuint16m2_t __riscv_vsll_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsll_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              size_t rs1, size_t vl);
vuint16m4_t __riscv_vsll_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsll_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              size_t rs1, size_t vl);
vuint16m8_t __riscv_vsll_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsll_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              size_t rs1, size_t vl);
vuint32mf2_t __riscv_vsll_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsll_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               size_t rs1, size_t vl);
vuint32m1_t __riscv_vsll_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsll_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              size_t rs1, size_t vl);
vuint32m2_t __riscv_vsll_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsll_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              size_t rs1, size_t vl);
vuint32m4_t __riscv_vsll_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsll_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              size_t rs1, size_t vl);
vuint32m8_t __riscv_vsll_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsll_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              size_t rs1, size_t vl);
vuint64m1_t __riscv_vsll_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsll_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              size_t rs1, size_t vl);
vuint64m2_t __riscv_vsll_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsll_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              size_t rs1, size_t vl);
vuint64m4_t __riscv_vsll_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsll_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              size_t rs1, size_t vl);
vuint64m8_t __riscv_vsll_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsll_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              size_t rs1, size_t vl);
vuint8mf8_t __riscv_vsrl_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsrl_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              size_t rs1, size_t vl);
vuint8mf4_t __riscv_vsrl_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsrl_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              size_t rs1, size_t vl);
vuint8mf2_t __riscv_vsrl_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsrl_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              size_t rs1, size_t vl);
vuint8m1_t __riscv_vsrl_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsrl_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             size_t rs1, size_t vl);
vuint8m2_t __riscv_vsrl_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsrl_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             size_t rs1, size_t vl);
vuint8m4_t __riscv_vsrl_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsrl_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             size_t rs1, size_t vl);
vuint8m8_t __riscv_vsrl_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsrl_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             size_t rs1, size_t vl);
vuint16mf4_t __riscv_vsrl_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsrl_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               size_t rs1, size_t vl);
vuint16mf2_t __riscv_vsrl_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsrl_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               size_t rs1, size_t vl);
vuint16m1_t __riscv_vsrl_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsrl_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              size_t rs1, size_t vl);
vuint16m2_t __riscv_vsrl_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsrl_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              size_t rs1, size_t vl);
vuint16m4_t __riscv_vsrl_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsrl_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              size_t rs1, size_t vl);
vuint16m8_t __riscv_vsrl_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsrl_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              size_t rs1, size_t vl);
vuint32mf2_t __riscv_vsrl_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsrl_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               size_t rs1, size_t vl);
vuint32m1_t __riscv_vsrl_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsrl_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              size_t rs1, size_t vl);
vuint32m2_t __riscv_vsrl_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsrl_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              size_t rs1, size_t vl);
vuint32m4_t __riscv_vsrl_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsrl_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              size_t rs1, size_t vl);
vuint32m8_t __riscv_vsrl_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsrl_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              size_t rs1, size_t vl);
vuint64m1_t __riscv_vsrl_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsrl_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              size_t rs1, size_t vl);
vuint64m2_t __riscv_vsrl_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsrl_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              size_t rs1, size_t vl);
vuint64m4_t __riscv_vsrl_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsrl_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              size_t rs1, size_t vl);
vuint64m8_t __riscv_vsrl_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsrl_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              size_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vsll_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsll_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           size_t rs1, size_t vl);
vint8mf4_t __riscv_vsll_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsll_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           size_t rs1, size_t vl);
vint8mf2_t __riscv_vsll_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsll_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           size_t rs1, size_t vl);
vint8m1_t __riscv_vsll_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsll_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, size_t rs1,
                          size_t vl);
vint8m2_t __riscv_vsll_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsll_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, size_t rs1,
                          size_t vl);
vint8m4_t __riscv_vsll_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsll_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, size_t rs1,
                          size_t vl);
vint8m8_t __riscv_vsll_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsll_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, size_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vsll_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsll_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            size_t rs1, size_t vl);
vint16mf2_t __riscv_vsll_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsll_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            size_t rs1, size_t vl);
vint16m1_t __riscv_vsll_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsll_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           size_t rs1, size_t vl);
vint16m2_t __riscv_vsll_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsll_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           size_t rs1, size_t vl);
vint16m4_t __riscv_vsll_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsll_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           size_t rs1, size_t vl);
vint16m8_t __riscv_vsll_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsll_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           size_t rs1, size_t vl);
vint32mf2_t __riscv_vsll_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsll_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            size_t rs1, size_t vl);
vint32m1_t __riscv_vsll_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsll_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           size_t rs1, size_t vl);
vint32m2_t __riscv_vsll_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsll_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           size_t rs1, size_t vl);
vint32m4_t __riscv_vsll_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsll_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           size_t rs1, size_t vl);
vint32m8_t __riscv_vsll_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsll_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           size_t rs1, size_t vl);
vint64m1_t __riscv_vsll_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsll_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           size_t rs1, size_t vl);
vint64m2_t __riscv_vsll_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsll_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           size_t rs1, size_t vl);
vint64m4_t __riscv_vsll_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsll_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           size_t rs1, size_t vl);
vint64m8_t __riscv_vsll_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsll_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           size_t rs1, size_t vl);
vint8mf8_t __riscv_vsra_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vsra_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           size_t rs1, size_t vl);
vint8mf4_t __riscv_vsra_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vsra_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           size_t rs1, size_t vl);
vint8mf2_t __riscv_vsra_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vsra_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           size_t rs1, size_t vl);
vint8m1_t __riscv_vsra_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vsra_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, size_t rs1,
                          size_t vl);
vint8m2_t __riscv_vsra_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vsra_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, size_t rs1,
                          size_t vl);
vint8m4_t __riscv_vsra_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vsra_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, size_t rs1,
                          size_t vl);
vint8m8_t __riscv_vsra_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vsra_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, size_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vsra_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vsra_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            size_t rs1, size_t vl);
vint16mf2_t __riscv_vsra_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vsra_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            size_t rs1, size_t vl);
vint16m1_t __riscv_vsra_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vsra_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           size_t rs1, size_t vl);
vint16m2_t __riscv_vsra_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vsra_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           size_t rs1, size_t vl);
vint16m4_t __riscv_vsra_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vsra_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           size_t rs1, size_t vl);
vint16m8_t __riscv_vsra_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vsra_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           size_t rs1, size_t vl);
vint32mf2_t __riscv_vsra_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vsra_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            size_t rs1, size_t vl);
vint32m1_t __riscv_vsra_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vsra_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           size_t rs1, size_t vl);
vint32m2_t __riscv_vsra_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vsra_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           size_t rs1, size_t vl);
vint32m4_t __riscv_vsra_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vsra_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           size_t rs1, size_t vl);
vint32m8_t __riscv_vsra_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vsra_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           size_t rs1, size_t vl);
vint64m1_t __riscv_vsra_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vsra_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           size_t rs1, size_t vl);
vint64m2_t __riscv_vsra_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vsra_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           size_t rs1, size_t vl);
vint64m4_t __riscv_vsra_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vsra_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           size_t rs1, size_t vl);
vint64m8_t __riscv_vsra_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vsra_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           size_t rs1, size_t vl);
vuint8mf8_t __riscv_vsll_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsll_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            size_t rs1, size_t vl);
vuint8mf4_t __riscv_vsll_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsll_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            size_t rs1, size_t vl);
vuint8mf2_t __riscv_vsll_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsll_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            size_t rs1, size_t vl);
vuint8m1_t __riscv_vsll_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsll_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           size_t rs1, size_t vl);
vuint8m2_t __riscv_vsll_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsll_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           size_t rs1, size_t vl);
vuint8m4_t __riscv_vsll_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsll_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           size_t rs1, size_t vl);
vuint8m8_t __riscv_vsll_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsll_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           size_t rs1, size_t vl);
vuint16mf4_t __riscv_vsll_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsll_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             size_t rs1, size_t vl);
vuint16mf2_t __riscv_vsll_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsll_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             size_t rs1, size_t vl);
vuint16m1_t __riscv_vsll_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsll_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            size_t rs1, size_t vl);
vuint16m2_t __riscv_vsll_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsll_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            size_t rs1, size_t vl);
vuint16m4_t __riscv_vsll_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsll_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            size_t rs1, size_t vl);
vuint16m8_t __riscv_vsll_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsll_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            size_t rs1, size_t vl);
vuint32mf2_t __riscv_vsll_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsll_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             size_t rs1, size_t vl);
vuint32m1_t __riscv_vsll_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsll_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            size_t rs1, size_t vl);
vuint32m2_t __riscv_vsll_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsll_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            size_t rs1, size_t vl);
vuint32m4_t __riscv_vsll_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsll_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            size_t rs1, size_t vl);
vuint32m8_t __riscv_vsll_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsll_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            size_t rs1, size_t vl);
vuint64m1_t __riscv_vsll_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsll_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            size_t rs1, size_t vl);
vuint64m2_t __riscv_vsll_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsll_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            size_t rs1, size_t vl);
vuint64m4_t __riscv_vsll_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsll_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            size_t rs1, size_t vl);
vuint64m8_t __riscv_vsll_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsll_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            size_t rs1, size_t vl);
vuint8mf8_t __riscv_vsrl_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vsrl_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            size_t rs1, size_t vl);
vuint8mf4_t __riscv_vsrl_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vsrl_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            size_t rs1, size_t vl);
vuint8mf2_t __riscv_vsrl_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vsrl_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            size_t rs1, size_t vl);
vuint8m1_t __riscv_vsrl_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vsrl_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           size_t rs1, size_t vl);
vuint8m2_t __riscv_vsrl_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vsrl_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           size_t rs1, size_t vl);
vuint8m4_t __riscv_vsrl_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vsrl_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           size_t rs1, size_t vl);
vuint8m8_t __riscv_vsrl_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vsrl_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           size_t rs1, size_t vl);
vuint16mf4_t __riscv_vsrl_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vsrl_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             size_t rs1, size_t vl);
vuint16mf2_t __riscv_vsrl_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vsrl_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             size_t rs1, size_t vl);
vuint16m1_t __riscv_vsrl_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vsrl_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            size_t rs1, size_t vl);
vuint16m2_t __riscv_vsrl_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vsrl_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            size_t rs1, size_t vl);
vuint16m4_t __riscv_vsrl_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vsrl_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            size_t rs1, size_t vl);
vuint16m8_t __riscv_vsrl_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vsrl_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            size_t rs1, size_t vl);
vuint32mf2_t __riscv_vsrl_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vsrl_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             size_t rs1, size_t vl);
vuint32m1_t __riscv_vsrl_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vsrl_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            size_t rs1, size_t vl);
vuint32m2_t __riscv_vsrl_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vsrl_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            size_t rs1, size_t vl);
vuint32m4_t __riscv_vsrl_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vsrl_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            size_t rs1, size_t vl);
vuint32m8_t __riscv_vsrl_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vsrl_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            size_t rs1, size_t vl);
vuint64m1_t __riscv_vsrl_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vsrl_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            size_t rs1, size_t vl);
vuint64m2_t __riscv_vsrl_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vsrl_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            size_t rs1, size_t vl);
vuint64m4_t __riscv_vsrl_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vsrl_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            size_t rs1, size_t vl);
vuint64m8_t __riscv_vsrl_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vsrl_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            size_t rs1, size_t vl);

Vector Narrowing Integer Right Shift Intrinsics

vint8mf8_t __riscv_vnsra_tu(vint8mf8_t vd, vint16mf4_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vint8mf8_t __riscv_vnsra_tu(vint8mf8_t vd, vint16mf4_t vs2, size_t rs1,
                            size_t vl);
vint8mf4_t __riscv_vnsra_tu(vint8mf4_t vd, vint16mf2_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vint8mf4_t __riscv_vnsra_tu(vint8mf4_t vd, vint16mf2_t vs2, size_t rs1,
                            size_t vl);
vint8mf2_t __riscv_vnsra_tu(vint8mf2_t vd, vint16m1_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vint8mf2_t __riscv_vnsra_tu(vint8mf2_t vd, vint16m1_t vs2, size_t rs1,
                            size_t vl);
vint8m1_t __riscv_vnsra_tu(vint8m1_t vd, vint16m2_t vs2, vuint8m1_t vs1,
                           size_t vl);
vint8m1_t __riscv_vnsra_tu(vint8m1_t vd, vint16m2_t vs2, size_t rs1, size_t vl);
vint8m2_t __riscv_vnsra_tu(vint8m2_t vd, vint16m4_t vs2, vuint8m2_t vs1,
                           size_t vl);
vint8m2_t __riscv_vnsra_tu(vint8m2_t vd, vint16m4_t vs2, size_t rs1, size_t vl);
vint8m4_t __riscv_vnsra_tu(vint8m4_t vd, vint16m8_t vs2, vuint8m4_t vs1,
                           size_t vl);
vint8m4_t __riscv_vnsra_tu(vint8m4_t vd, vint16m8_t vs2, size_t rs1, size_t vl);
vint16mf4_t __riscv_vnsra_tu(vint16mf4_t vd, vint32mf2_t vs2, vuint16mf4_t vs1,
                             size_t vl);
vint16mf4_t __riscv_vnsra_tu(vint16mf4_t vd, vint32mf2_t vs2, size_t rs1,
                             size_t vl);
vint16mf2_t __riscv_vnsra_tu(vint16mf2_t vd, vint32m1_t vs2, vuint16mf2_t vs1,
                             size_t vl);
vint16mf2_t __riscv_vnsra_tu(vint16mf2_t vd, vint32m1_t vs2, size_t rs1,
                             size_t vl);
vint16m1_t __riscv_vnsra_tu(vint16m1_t vd, vint32m2_t vs2, vuint16m1_t vs1,
                            size_t vl);
vint16m1_t __riscv_vnsra_tu(vint16m1_t vd, vint32m2_t vs2, size_t rs1,
                            size_t vl);
vint16m2_t __riscv_vnsra_tu(vint16m2_t vd, vint32m4_t vs2, vuint16m2_t vs1,
                            size_t vl);
vint16m2_t __riscv_vnsra_tu(vint16m2_t vd, vint32m4_t vs2, size_t rs1,
                            size_t vl);
vint16m4_t __riscv_vnsra_tu(vint16m4_t vd, vint32m8_t vs2, vuint16m4_t vs1,
                            size_t vl);
vint16m4_t __riscv_vnsra_tu(vint16m4_t vd, vint32m8_t vs2, size_t rs1,
                            size_t vl);
vint32mf2_t __riscv_vnsra_tu(vint32mf2_t vd, vint64m1_t vs2, vuint32mf2_t vs1,
                             size_t vl);
vint32mf2_t __riscv_vnsra_tu(vint32mf2_t vd, vint64m1_t vs2, size_t rs1,
                             size_t vl);
vint32m1_t __riscv_vnsra_tu(vint32m1_t vd, vint64m2_t vs2, vuint32m1_t vs1,
                            size_t vl);
vint32m1_t __riscv_vnsra_tu(vint32m1_t vd, vint64m2_t vs2, size_t rs1,
                            size_t vl);
vint32m2_t __riscv_vnsra_tu(vint32m2_t vd, vint64m4_t vs2, vuint32m2_t vs1,
                            size_t vl);
vint32m2_t __riscv_vnsra_tu(vint32m2_t vd, vint64m4_t vs2, size_t rs1,
                            size_t vl);
vint32m4_t __riscv_vnsra_tu(vint32m4_t vd, vint64m8_t vs2, vuint32m4_t vs1,
                            size_t vl);
vint32m4_t __riscv_vnsra_tu(vint32m4_t vd, vint64m8_t vs2, size_t rs1,
                            size_t vl);
vuint8mf8_t __riscv_vnsrl_tu(vuint8mf8_t vd, vuint16mf4_t vs2, vuint8mf8_t vs1,
                             size_t vl);
vuint8mf8_t __riscv_vnsrl_tu(vuint8mf8_t vd, vuint16mf4_t vs2, size_t rs1,
                             size_t vl);
vuint8mf4_t __riscv_vnsrl_tu(vuint8mf4_t vd, vuint16mf2_t vs2, vuint8mf4_t vs1,
                             size_t vl);
vuint8mf4_t __riscv_vnsrl_tu(vuint8mf4_t vd, vuint16mf2_t vs2, size_t rs1,
                             size_t vl);
vuint8mf2_t __riscv_vnsrl_tu(vuint8mf2_t vd, vuint16m1_t vs2, vuint8mf2_t vs1,
                             size_t vl);
vuint8mf2_t __riscv_vnsrl_tu(vuint8mf2_t vd, vuint16m1_t vs2, size_t rs1,
                             size_t vl);
vuint8m1_t __riscv_vnsrl_tu(vuint8m1_t vd, vuint16m2_t vs2, vuint8m1_t vs1,
                            size_t vl);
vuint8m1_t __riscv_vnsrl_tu(vuint8m1_t vd, vuint16m2_t vs2, size_t rs1,
                            size_t vl);
vuint8m2_t __riscv_vnsrl_tu(vuint8m2_t vd, vuint16m4_t vs2, vuint8m2_t vs1,
                            size_t vl);
vuint8m2_t __riscv_vnsrl_tu(vuint8m2_t vd, vuint16m4_t vs2, size_t rs1,
                            size_t vl);
vuint8m4_t __riscv_vnsrl_tu(vuint8m4_t vd, vuint16m8_t vs2, vuint8m4_t vs1,
                            size_t vl);
vuint8m4_t __riscv_vnsrl_tu(vuint8m4_t vd, vuint16m8_t vs2, size_t rs1,
                            size_t vl);
vuint16mf4_t __riscv_vnsrl_tu(vuint16mf4_t vd, vuint32mf2_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vnsrl_tu(vuint16mf4_t vd, vuint32mf2_t vs2, size_t rs1,
                              size_t vl);
vuint16mf2_t __riscv_vnsrl_tu(vuint16mf2_t vd, vuint32m1_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vnsrl_tu(vuint16mf2_t vd, vuint32m1_t vs2, size_t rs1,
                              size_t vl);
vuint16m1_t __riscv_vnsrl_tu(vuint16m1_t vd, vuint32m2_t vs2, vuint16m1_t vs1,
                             size_t vl);
vuint16m1_t __riscv_vnsrl_tu(vuint16m1_t vd, vuint32m2_t vs2, size_t rs1,
                             size_t vl);
vuint16m2_t __riscv_vnsrl_tu(vuint16m2_t vd, vuint32m4_t vs2, vuint16m2_t vs1,
                             size_t vl);
vuint16m2_t __riscv_vnsrl_tu(vuint16m2_t vd, vuint32m4_t vs2, size_t rs1,
                             size_t vl);
vuint16m4_t __riscv_vnsrl_tu(vuint16m4_t vd, vuint32m8_t vs2, vuint16m4_t vs1,
                             size_t vl);
vuint16m4_t __riscv_vnsrl_tu(vuint16m4_t vd, vuint32m8_t vs2, size_t rs1,
                             size_t vl);
vuint32mf2_t __riscv_vnsrl_tu(vuint32mf2_t vd, vuint64m1_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vnsrl_tu(vuint32mf2_t vd, vuint64m1_t vs2, size_t rs1,
                              size_t vl);
vuint32m1_t __riscv_vnsrl_tu(vuint32m1_t vd, vuint64m2_t vs2, vuint32m1_t vs1,
                             size_t vl);
vuint32m1_t __riscv_vnsrl_tu(vuint32m1_t vd, vuint64m2_t vs2, size_t rs1,
                             size_t vl);
vuint32m2_t __riscv_vnsrl_tu(vuint32m2_t vd, vuint64m4_t vs2, vuint32m2_t vs1,
                             size_t vl);
vuint32m2_t __riscv_vnsrl_tu(vuint32m2_t vd, vuint64m4_t vs2, size_t rs1,
                             size_t vl);
vuint32m4_t __riscv_vnsrl_tu(vuint32m4_t vd, vuint64m8_t vs2, vuint32m4_t vs1,
                             size_t vl);
vuint32m4_t __riscv_vnsrl_tu(vuint32m4_t vd, vuint64m8_t vs2, size_t rs1,
                             size_t vl);
// masked functions
vint8mf8_t __riscv_vnsra_tum(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vnsra_tum(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                             size_t rs1, size_t vl);
vint8mf4_t __riscv_vnsra_tum(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vnsra_tum(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                             size_t rs1, size_t vl);
vint8mf2_t __riscv_vnsra_tum(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vnsra_tum(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                             size_t rs1, size_t vl);
vint8m1_t __riscv_vnsra_tum(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                            vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vnsra_tum(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                            size_t rs1, size_t vl);
vint8m2_t __riscv_vnsra_tum(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                            vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vnsra_tum(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                            size_t rs1, size_t vl);
vint8m4_t __riscv_vnsra_tum(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                            vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vnsra_tum(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                            size_t rs1, size_t vl);
vint16mf4_t __riscv_vnsra_tum(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vnsra_tum(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                              size_t rs1, size_t vl);
vint16mf2_t __riscv_vnsra_tum(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vnsra_tum(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                              size_t rs1, size_t vl);
vint16m1_t __riscv_vnsra_tum(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                             vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vnsra_tum(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                             size_t rs1, size_t vl);
vint16m2_t __riscv_vnsra_tum(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                             vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vnsra_tum(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                             size_t rs1, size_t vl);
vint16m4_t __riscv_vnsra_tum(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                             vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vnsra_tum(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                             size_t rs1, size_t vl);
vint32mf2_t __riscv_vnsra_tum(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vnsra_tum(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                              size_t rs1, size_t vl);
vint32m1_t __riscv_vnsra_tum(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                             vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vnsra_tum(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                             size_t rs1, size_t vl);
vint32m2_t __riscv_vnsra_tum(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                             vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vnsra_tum(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                             size_t rs1, size_t vl);
vint32m4_t __riscv_vnsra_tum(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                             vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vnsra_tum(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                             size_t rs1, size_t vl);
vuint8mf8_t __riscv_vnsrl_tum(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vnsrl_tum(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                              size_t rs1, size_t vl);
vuint8mf4_t __riscv_vnsrl_tum(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vnsrl_tum(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                              size_t rs1, size_t vl);
vuint8mf2_t __riscv_vnsrl_tum(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vnsrl_tum(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                              size_t rs1, size_t vl);
vuint8m1_t __riscv_vnsrl_tum(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vnsrl_tum(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                             size_t rs1, size_t vl);
vuint8m2_t __riscv_vnsrl_tum(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vnsrl_tum(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                             size_t rs1, size_t vl);
vuint8m4_t __riscv_vnsrl_tum(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vnsrl_tum(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                             size_t rs1, size_t vl);
vuint16mf4_t __riscv_vnsrl_tum(vbool64_t vm, vuint16mf4_t vd, vuint32mf2_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vnsrl_tum(vbool64_t vm, vuint16mf4_t vd, vuint32mf2_t vs2,
                               size_t rs1, size_t vl);
vuint16mf2_t __riscv_vnsrl_tum(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vnsrl_tum(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                               size_t rs1, size_t vl);
vuint16m1_t __riscv_vnsrl_tum(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vnsrl_tum(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                              size_t rs1, size_t vl);
vuint16m2_t __riscv_vnsrl_tum(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vnsrl_tum(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                              size_t rs1, size_t vl);
vuint16m4_t __riscv_vnsrl_tum(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vnsrl_tum(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                              size_t rs1, size_t vl);
vuint32mf2_t __riscv_vnsrl_tum(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vnsrl_tum(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                               size_t rs1, size_t vl);
vuint32m1_t __riscv_vnsrl_tum(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vnsrl_tum(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                              size_t rs1, size_t vl);
vuint32m2_t __riscv_vnsrl_tum(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vnsrl_tum(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                              size_t rs1, size_t vl);
vuint32m4_t __riscv_vnsrl_tum(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vnsrl_tum(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                              size_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vnsra_tumu(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vnsra_tumu(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                              size_t rs1, size_t vl);
vint8mf4_t __riscv_vnsra_tumu(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vnsra_tumu(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                              size_t rs1, size_t vl);
vint8mf2_t __riscv_vnsra_tumu(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vnsra_tumu(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                              size_t rs1, size_t vl);
vint8m1_t __riscv_vnsra_tumu(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                             vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vnsra_tumu(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                             size_t rs1, size_t vl);
vint8m2_t __riscv_vnsra_tumu(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                             vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vnsra_tumu(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                             size_t rs1, size_t vl);
vint8m4_t __riscv_vnsra_tumu(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                             vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vnsra_tumu(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                             size_t rs1, size_t vl);
vint16mf4_t __riscv_vnsra_tumu(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vnsra_tumu(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                               size_t rs1, size_t vl);
vint16mf2_t __riscv_vnsra_tumu(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vnsra_tumu(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                               size_t rs1, size_t vl);
vint16m1_t __riscv_vnsra_tumu(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                              vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vnsra_tumu(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                              size_t rs1, size_t vl);
vint16m2_t __riscv_vnsra_tumu(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                              vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vnsra_tumu(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                              size_t rs1, size_t vl);
vint16m4_t __riscv_vnsra_tumu(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                              vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vnsra_tumu(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                              size_t rs1, size_t vl);
vint32mf2_t __riscv_vnsra_tumu(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vnsra_tumu(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                               size_t rs1, size_t vl);
vint32m1_t __riscv_vnsra_tumu(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                              vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vnsra_tumu(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                              size_t rs1, size_t vl);
vint32m2_t __riscv_vnsra_tumu(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                              vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vnsra_tumu(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                              size_t rs1, size_t vl);
vint32m4_t __riscv_vnsra_tumu(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                              vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vnsra_tumu(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                              size_t rs1, size_t vl);
vuint8mf8_t __riscv_vnsrl_tumu(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vnsrl_tumu(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                               size_t rs1, size_t vl);
vuint8mf4_t __riscv_vnsrl_tumu(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vnsrl_tumu(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                               size_t rs1, size_t vl);
vuint8mf2_t __riscv_vnsrl_tumu(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vnsrl_tumu(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                               size_t rs1, size_t vl);
vuint8m1_t __riscv_vnsrl_tumu(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                              vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vnsrl_tumu(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                              size_t rs1, size_t vl);
vuint8m2_t __riscv_vnsrl_tumu(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                              vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vnsrl_tumu(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                              size_t rs1, size_t vl);
vuint8m4_t __riscv_vnsrl_tumu(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                              vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vnsrl_tumu(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                              size_t rs1, size_t vl);
vuint16mf4_t __riscv_vnsrl_tumu(vbool64_t vm, vuint16mf4_t vd, vuint32mf2_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vnsrl_tumu(vbool64_t vm, vuint16mf4_t vd, vuint32mf2_t vs2,
                                size_t rs1, size_t vl);
vuint16mf2_t __riscv_vnsrl_tumu(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vnsrl_tumu(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                                size_t rs1, size_t vl);
vuint16m1_t __riscv_vnsrl_tumu(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                               vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vnsrl_tumu(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                               size_t rs1, size_t vl);
vuint16m2_t __riscv_vnsrl_tumu(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                               vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vnsrl_tumu(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                               size_t rs1, size_t vl);
vuint16m4_t __riscv_vnsrl_tumu(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                               vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vnsrl_tumu(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                               size_t rs1, size_t vl);
vuint32mf2_t __riscv_vnsrl_tumu(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vnsrl_tumu(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                                size_t rs1, size_t vl);
vuint32m1_t __riscv_vnsrl_tumu(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                               vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vnsrl_tumu(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                               size_t rs1, size_t vl);
vuint32m2_t __riscv_vnsrl_tumu(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                               vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vnsrl_tumu(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                               size_t rs1, size_t vl);
vuint32m4_t __riscv_vnsrl_tumu(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                               vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vnsrl_tumu(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                               size_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vnsra_mu(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vnsra_mu(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                            size_t rs1, size_t vl);
vint8mf4_t __riscv_vnsra_mu(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vnsra_mu(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                            size_t rs1, size_t vl);
vint8mf2_t __riscv_vnsra_mu(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vnsra_mu(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                            size_t rs1, size_t vl);
vint8m1_t __riscv_vnsra_mu(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                           vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vnsra_mu(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                           size_t rs1, size_t vl);
vint8m2_t __riscv_vnsra_mu(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                           vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vnsra_mu(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                           size_t rs1, size_t vl);
vint8m4_t __riscv_vnsra_mu(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                           vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vnsra_mu(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                           size_t rs1, size_t vl);
vint16mf4_t __riscv_vnsra_mu(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vnsra_mu(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                             size_t rs1, size_t vl);
vint16mf2_t __riscv_vnsra_mu(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vnsra_mu(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                             size_t rs1, size_t vl);
vint16m1_t __riscv_vnsra_mu(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                            vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vnsra_mu(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                            size_t rs1, size_t vl);
vint16m2_t __riscv_vnsra_mu(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                            vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vnsra_mu(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                            size_t rs1, size_t vl);
vint16m4_t __riscv_vnsra_mu(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                            vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vnsra_mu(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                            size_t rs1, size_t vl);
vint32mf2_t __riscv_vnsra_mu(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vnsra_mu(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                             size_t rs1, size_t vl);
vint32m1_t __riscv_vnsra_mu(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                            vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vnsra_mu(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                            size_t rs1, size_t vl);
vint32m2_t __riscv_vnsra_mu(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                            vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vnsra_mu(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                            size_t rs1, size_t vl);
vint32m4_t __riscv_vnsra_mu(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                            vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vnsra_mu(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                            size_t rs1, size_t vl);
vuint8mf8_t __riscv_vnsrl_mu(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vnsrl_mu(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                             size_t rs1, size_t vl);
vuint8mf4_t __riscv_vnsrl_mu(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vnsrl_mu(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                             size_t rs1, size_t vl);
vuint8mf2_t __riscv_vnsrl_mu(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vnsrl_mu(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                             size_t rs1, size_t vl);
vuint8m1_t __riscv_vnsrl_mu(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vnsrl_mu(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                            size_t rs1, size_t vl);
vuint8m2_t __riscv_vnsrl_mu(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vnsrl_mu(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                            size_t rs1, size_t vl);
vuint8m4_t __riscv_vnsrl_mu(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vnsrl_mu(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                            size_t rs1, size_t vl);
vuint16mf4_t __riscv_vnsrl_mu(vbool64_t vm, vuint16mf4_t vd, vuint32mf2_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vnsrl_mu(vbool64_t vm, vuint16mf4_t vd, vuint32mf2_t vs2,
                              size_t rs1, size_t vl);
vuint16mf2_t __riscv_vnsrl_mu(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vnsrl_mu(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                              size_t rs1, size_t vl);
vuint16m1_t __riscv_vnsrl_mu(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vnsrl_mu(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                             size_t rs1, size_t vl);
vuint16m2_t __riscv_vnsrl_mu(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vnsrl_mu(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                             size_t rs1, size_t vl);
vuint16m4_t __riscv_vnsrl_mu(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vnsrl_mu(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                             size_t rs1, size_t vl);
vuint32mf2_t __riscv_vnsrl_mu(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vnsrl_mu(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                              size_t rs1, size_t vl);
vuint32m1_t __riscv_vnsrl_mu(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vnsrl_mu(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                             size_t rs1, size_t vl);
vuint32m2_t __riscv_vnsrl_mu(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vnsrl_mu(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                             size_t rs1, size_t vl);
vuint32m4_t __riscv_vnsrl_mu(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vnsrl_mu(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                             size_t rs1, size_t vl);

Vector Integer Narrowing Intrinsics

vint8mf8_t __riscv_vncvt_x_tu(vint8mf8_t vd, vint16mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vncvt_x_tu(vint8mf4_t vd, vint16mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vncvt_x_tu(vint8mf2_t vd, vint16m1_t vs2, size_t vl);
vint8m1_t __riscv_vncvt_x_tu(vint8m1_t vd, vint16m2_t vs2, size_t vl);
vint8m2_t __riscv_vncvt_x_tu(vint8m2_t vd, vint16m4_t vs2, size_t vl);
vint8m4_t __riscv_vncvt_x_tu(vint8m4_t vd, vint16m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vncvt_x_tu(vuint8mf8_t vd, vuint16mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vncvt_x_tu(vuint8mf4_t vd, vuint16mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vncvt_x_tu(vuint8mf2_t vd, vuint16m1_t vs2, size_t vl);
vuint8m1_t __riscv_vncvt_x_tu(vuint8m1_t vd, vuint16m2_t vs2, size_t vl);
vuint8m2_t __riscv_vncvt_x_tu(vuint8m2_t vd, vuint16m4_t vs2, size_t vl);
vuint8m4_t __riscv_vncvt_x_tu(vuint8m4_t vd, vuint16m8_t vs2, size_t vl);
vint16mf4_t __riscv_vncvt_x_tu(vint16mf4_t vd, vint32mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vncvt_x_tu(vint16mf2_t vd, vint32m1_t vs2, size_t vl);
vint16m1_t __riscv_vncvt_x_tu(vint16m1_t vd, vint32m2_t vs2, size_t vl);
vint16m2_t __riscv_vncvt_x_tu(vint16m2_t vd, vint32m4_t vs2, size_t vl);
vint16m4_t __riscv_vncvt_x_tu(vint16m4_t vd, vint32m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vncvt_x_tu(vuint16mf4_t vd, vuint32mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vncvt_x_tu(vuint16mf2_t vd, vuint32m1_t vs2, size_t vl);
vuint16m1_t __riscv_vncvt_x_tu(vuint16m1_t vd, vuint32m2_t vs2, size_t vl);
vuint16m2_t __riscv_vncvt_x_tu(vuint16m2_t vd, vuint32m4_t vs2, size_t vl);
vuint16m4_t __riscv_vncvt_x_tu(vuint16m4_t vd, vuint32m8_t vs2, size_t vl);
vint32mf2_t __riscv_vncvt_x_tu(vint32mf2_t vd, vint64m1_t vs2, size_t vl);
vint32m1_t __riscv_vncvt_x_tu(vint32m1_t vd, vint64m2_t vs2, size_t vl);
vint32m2_t __riscv_vncvt_x_tu(vint32m2_t vd, vint64m4_t vs2, size_t vl);
vint32m4_t __riscv_vncvt_x_tu(vint32m4_t vd, vint64m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vncvt_x_tu(vuint32mf2_t vd, vuint64m1_t vs2, size_t vl);
vuint32m1_t __riscv_vncvt_x_tu(vuint32m1_t vd, vuint64m2_t vs2, size_t vl);
vuint32m2_t __riscv_vncvt_x_tu(vuint32m2_t vd, vuint64m4_t vs2, size_t vl);
vuint32m4_t __riscv_vncvt_x_tu(vuint32m4_t vd, vuint64m8_t vs2, size_t vl);
// masked functions
vint8mf8_t __riscv_vncvt_x_tum(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                               size_t vl);
vint8mf4_t __riscv_vncvt_x_tum(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                               size_t vl);
vint8mf2_t __riscv_vncvt_x_tum(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                               size_t vl);
vint8m1_t __riscv_vncvt_x_tum(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                              size_t vl);
vint8m2_t __riscv_vncvt_x_tum(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                              size_t vl);
vint8m4_t __riscv_vncvt_x_tum(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                              size_t vl);
vuint8mf8_t __riscv_vncvt_x_tum(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                                size_t vl);
vuint8mf4_t __riscv_vncvt_x_tum(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                                size_t vl);
vuint8mf2_t __riscv_vncvt_x_tum(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                                size_t vl);
vuint8m1_t __riscv_vncvt_x_tum(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                               size_t vl);
vuint8m2_t __riscv_vncvt_x_tum(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                               size_t vl);
vuint8m4_t __riscv_vncvt_x_tum(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                               size_t vl);
vint16mf4_t __riscv_vncvt_x_tum(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                                size_t vl);
vint16mf2_t __riscv_vncvt_x_tum(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                                size_t vl);
vint16m1_t __riscv_vncvt_x_tum(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                               size_t vl);
vint16m2_t __riscv_vncvt_x_tum(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                               size_t vl);
vint16m4_t __riscv_vncvt_x_tum(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                               size_t vl);
vuint16mf4_t __riscv_vncvt_x_tum(vbool64_t vm, vuint16mf4_t vd,
                                 vuint32mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vncvt_x_tum(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                                 size_t vl);
vuint16m1_t __riscv_vncvt_x_tum(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                                size_t vl);
vuint16m2_t __riscv_vncvt_x_tum(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                                size_t vl);
vuint16m4_t __riscv_vncvt_x_tum(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                                size_t vl);
vint32mf2_t __riscv_vncvt_x_tum(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                                size_t vl);
vint32m1_t __riscv_vncvt_x_tum(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                               size_t vl);
vint32m2_t __riscv_vncvt_x_tum(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                               size_t vl);
vint32m4_t __riscv_vncvt_x_tum(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                               size_t vl);
vuint32mf2_t __riscv_vncvt_x_tum(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                                 size_t vl);
vuint32m1_t __riscv_vncvt_x_tum(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                                size_t vl);
vuint32m2_t __riscv_vncvt_x_tum(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                                size_t vl);
vuint32m4_t __riscv_vncvt_x_tum(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                                size_t vl);
// masked functions
vint8mf8_t __riscv_vncvt_x_tumu(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                                size_t vl);
vint8mf4_t __riscv_vncvt_x_tumu(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                                size_t vl);
vint8mf2_t __riscv_vncvt_x_tumu(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                                size_t vl);
vint8m1_t __riscv_vncvt_x_tumu(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                               size_t vl);
vint8m2_t __riscv_vncvt_x_tumu(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                               size_t vl);
vint8m4_t __riscv_vncvt_x_tumu(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                               size_t vl);
vuint8mf8_t __riscv_vncvt_x_tumu(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                                 size_t vl);
vuint8mf4_t __riscv_vncvt_x_tumu(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                                 size_t vl);
vuint8mf2_t __riscv_vncvt_x_tumu(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                                 size_t vl);
vuint8m1_t __riscv_vncvt_x_tumu(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                                size_t vl);
vuint8m2_t __riscv_vncvt_x_tumu(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                                size_t vl);
vuint8m4_t __riscv_vncvt_x_tumu(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                                size_t vl);
vint16mf4_t __riscv_vncvt_x_tumu(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                                 size_t vl);
vint16mf2_t __riscv_vncvt_x_tumu(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                                 size_t vl);
vint16m1_t __riscv_vncvt_x_tumu(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                                size_t vl);
vint16m2_t __riscv_vncvt_x_tumu(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                                size_t vl);
vint16m4_t __riscv_vncvt_x_tumu(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                                size_t vl);
vuint16mf4_t __riscv_vncvt_x_tumu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint32mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vncvt_x_tumu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint32m1_t vs2, size_t vl);
vuint16m1_t __riscv_vncvt_x_tumu(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                                 size_t vl);
vuint16m2_t __riscv_vncvt_x_tumu(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                                 size_t vl);
vuint16m4_t __riscv_vncvt_x_tumu(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                                 size_t vl);
vint32mf2_t __riscv_vncvt_x_tumu(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                                 size_t vl);
vint32m1_t __riscv_vncvt_x_tumu(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                                size_t vl);
vint32m2_t __riscv_vncvt_x_tumu(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                                size_t vl);
vint32m4_t __riscv_vncvt_x_tumu(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                                size_t vl);
vuint32mf2_t __riscv_vncvt_x_tumu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint64m1_t vs2, size_t vl);
vuint32m1_t __riscv_vncvt_x_tumu(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                                 size_t vl);
vuint32m2_t __riscv_vncvt_x_tumu(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                                 size_t vl);
vuint32m4_t __riscv_vncvt_x_tumu(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                                 size_t vl);
// masked functions
vint8mf8_t __riscv_vncvt_x_mu(vbool64_t vm, vint8mf8_t vd, vint16mf4_t vs2,
                              size_t vl);
vint8mf4_t __riscv_vncvt_x_mu(vbool32_t vm, vint8mf4_t vd, vint16mf2_t vs2,
                              size_t vl);
vint8mf2_t __riscv_vncvt_x_mu(vbool16_t vm, vint8mf2_t vd, vint16m1_t vs2,
                              size_t vl);
vint8m1_t __riscv_vncvt_x_mu(vbool8_t vm, vint8m1_t vd, vint16m2_t vs2,
                             size_t vl);
vint8m2_t __riscv_vncvt_x_mu(vbool4_t vm, vint8m2_t vd, vint16m4_t vs2,
                             size_t vl);
vint8m4_t __riscv_vncvt_x_mu(vbool2_t vm, vint8m4_t vd, vint16m8_t vs2,
                             size_t vl);
vuint8mf8_t __riscv_vncvt_x_mu(vbool64_t vm, vuint8mf8_t vd, vuint16mf4_t vs2,
                               size_t vl);
vuint8mf4_t __riscv_vncvt_x_mu(vbool32_t vm, vuint8mf4_t vd, vuint16mf2_t vs2,
                               size_t vl);
vuint8mf2_t __riscv_vncvt_x_mu(vbool16_t vm, vuint8mf2_t vd, vuint16m1_t vs2,
                               size_t vl);
vuint8m1_t __riscv_vncvt_x_mu(vbool8_t vm, vuint8m1_t vd, vuint16m2_t vs2,
                              size_t vl);
vuint8m2_t __riscv_vncvt_x_mu(vbool4_t vm, vuint8m2_t vd, vuint16m4_t vs2,
                              size_t vl);
vuint8m4_t __riscv_vncvt_x_mu(vbool2_t vm, vuint8m4_t vd, vuint16m8_t vs2,
                              size_t vl);
vint16mf4_t __riscv_vncvt_x_mu(vbool64_t vm, vint16mf4_t vd, vint32mf2_t vs2,
                               size_t vl);
vint16mf2_t __riscv_vncvt_x_mu(vbool32_t vm, vint16mf2_t vd, vint32m1_t vs2,
                               size_t vl);
vint16m1_t __riscv_vncvt_x_mu(vbool16_t vm, vint16m1_t vd, vint32m2_t vs2,
                              size_t vl);
vint16m2_t __riscv_vncvt_x_mu(vbool8_t vm, vint16m2_t vd, vint32m4_t vs2,
                              size_t vl);
vint16m4_t __riscv_vncvt_x_mu(vbool4_t vm, vint16m4_t vd, vint32m8_t vs2,
                              size_t vl);
vuint16mf4_t __riscv_vncvt_x_mu(vbool64_t vm, vuint16mf4_t vd, vuint32mf2_t vs2,
                                size_t vl);
vuint16mf2_t __riscv_vncvt_x_mu(vbool32_t vm, vuint16mf2_t vd, vuint32m1_t vs2,
                                size_t vl);
vuint16m1_t __riscv_vncvt_x_mu(vbool16_t vm, vuint16m1_t vd, vuint32m2_t vs2,
                               size_t vl);
vuint16m2_t __riscv_vncvt_x_mu(vbool8_t vm, vuint16m2_t vd, vuint32m4_t vs2,
                               size_t vl);
vuint16m4_t __riscv_vncvt_x_mu(vbool4_t vm, vuint16m4_t vd, vuint32m8_t vs2,
                               size_t vl);
vint32mf2_t __riscv_vncvt_x_mu(vbool64_t vm, vint32mf2_t vd, vint64m1_t vs2,
                               size_t vl);
vint32m1_t __riscv_vncvt_x_mu(vbool32_t vm, vint32m1_t vd, vint64m2_t vs2,
                              size_t vl);
vint32m2_t __riscv_vncvt_x_mu(vbool16_t vm, vint32m2_t vd, vint64m4_t vs2,
                              size_t vl);
vint32m4_t __riscv_vncvt_x_mu(vbool8_t vm, vint32m4_t vd, vint64m8_t vs2,
                              size_t vl);
vuint32mf2_t __riscv_vncvt_x_mu(vbool64_t vm, vuint32mf2_t vd, vuint64m1_t vs2,
                                size_t vl);
vuint32m1_t __riscv_vncvt_x_mu(vbool32_t vm, vuint32m1_t vd, vuint64m2_t vs2,
                               size_t vl);
vuint32m2_t __riscv_vncvt_x_mu(vbool16_t vm, vuint32m2_t vd, vuint64m4_t vs2,
                               size_t vl);
vuint32m4_t __riscv_vncvt_x_mu(vbool8_t vm, vuint32m4_t vd, vuint64m8_t vs2,
                               size_t vl);

Vector Integer Compare Intrinsics

// masked functions
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmseq_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmseq_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           vint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           int16_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           vint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           int16_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2,
                          vint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2, int16_t rs1,
                          size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2,
                          vint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2, int16_t rs1,
                          size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2,
                          vint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2, int16_t rs1,
                          size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           vint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           int32_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2,
                          vint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2, int32_t rs1,
                          size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2,
                          vint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2, int32_t rs1,
                          size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2,
                          vint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2, int64_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmsne_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsne_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           vint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           int16_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           vint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           int16_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2,
                          vint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2, int16_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2,
                          vint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2, int16_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2,
                          vint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2, int16_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           vint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           int32_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2,
                          vint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2, int32_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2,
                          vint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2, int32_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2,
                          vint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2, int64_t rs1,
                          size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmslt_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmslt_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmslt_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmslt_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmslt_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmslt_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           vint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           int16_t rs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           vint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           int16_t rs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2,
                          vint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2, int16_t rs1,
                          size_t vl);
vbool4_t __riscv_vmslt_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2,
                          vint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmslt_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2, int16_t rs1,
                          size_t vl);
vbool2_t __riscv_vmslt_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2,
                          vint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmslt_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2, int16_t rs1,
                          size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           vint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           int32_t rs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2,
                          vint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2, int32_t rs1,
                          size_t vl);
vbool4_t __riscv_vmslt_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2,
                          vint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmslt_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2, int32_t rs1,
                          size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmslt_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmslt_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmslt_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2,
                          vint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmslt_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2, int64_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsle_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsle_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsle_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsle_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmsle_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsle_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           vint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           int16_t rs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           vint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           int16_t rs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2,
                          vint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2, int16_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsle_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2,
                          vint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsle_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2, int16_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsle_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2,
                          vint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsle_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2, int16_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           vint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           int32_t rs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2,
                          vint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2, int32_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsle_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2,
                          vint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsle_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2, int32_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsle_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsle_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsle_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2,
                          vint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsle_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2, int64_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsgt_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsgt_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsgt_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsgt_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmsgt_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsgt_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           vint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           int16_t rs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           vint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           int16_t rs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2,
                          vint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2, int16_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsgt_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2,
                          vint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsgt_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2, int16_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsgt_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2,
                          vint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsgt_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2, int16_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           vint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           int32_t rs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2,
                          vint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2, int32_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsgt_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2,
                          vint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsgt_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2, int32_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsgt_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsgt_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsgt_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2,
                          vint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsgt_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2, int64_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsge_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsge_mu(vbool4_t vm, vbool4_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsge_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsge_mu(vbool2_t vm, vbool2_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmsge_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsge_mu(vbool1_t vm, vbool1_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           vint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint16mf4_t vs2,
                           int16_t rs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           vint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint16mf2_t vs2,
                           int16_t rs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2,
                          vint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint16m2_t vs2, int16_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsge_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2,
                          vint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsge_mu(vbool4_t vm, vbool4_t vd, vint16m4_t vs2, int16_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsge_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2,
                          vint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsge_mu(vbool2_t vm, vbool2_t vd, vint16m8_t vs2, int16_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           vint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint32mf2_t vs2,
                           int32_t rs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2,
                          vint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint32m4_t vs2, int32_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsge_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2,
                          vint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsge_mu(vbool4_t vm, vbool4_t vd, vint32m8_t vs2, int32_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsge_mu(vbool64_t vm, vbool64_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsge_mu(vbool32_t vm, vbool32_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsge_mu(vbool16_t vm, vbool16_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2,
                          vint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsge_mu(vbool8_t vm, vbool8_t vd, vint64m8_t vs2, int64_t rs1,
                          size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                           vuint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                           uint8_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                           vuint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                           uint8_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                           vuint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                           uint8_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                          vuint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2, uint8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                          vuint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2, uint8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                          vuint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2, uint8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmseq_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                          vuint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmseq_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2, uint8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                           vuint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                           uint16_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                           vuint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                           uint16_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                           vuint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                           uint16_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                          vuint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                          uint16_t rs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                          vuint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                          uint16_t rs1, size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                          vuint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmseq_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                          uint16_t rs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                           vuint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                           uint32_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                           vuint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                           uint32_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                           vuint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                           uint32_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                          vuint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                          uint32_t rs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                          vuint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmseq_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                          uint32_t rs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                           vuint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmseq_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                           uint64_t rs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                           vuint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmseq_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                           uint64_t rs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                           vuint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmseq_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                           uint64_t rs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                          vuint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmseq_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                          uint64_t rs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                           vuint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                           uint8_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                           vuint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                           uint8_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                           vuint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                           uint8_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                          vuint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2, uint8_t rs1,
                          size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                          vuint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2, uint8_t rs1,
                          size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                          vuint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2, uint8_t rs1,
                          size_t vl);
vbool1_t __riscv_vmsne_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                          vuint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsne_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2, uint8_t rs1,
                          size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                           vuint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                           uint16_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                           vuint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                           uint16_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                           vuint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                           uint16_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                          vuint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                          uint16_t rs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                          vuint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                          uint16_t rs1, size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                          vuint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsne_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                          uint16_t rs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                           vuint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                           uint32_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                           vuint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                           uint32_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                           vuint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                           uint32_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                          vuint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                          uint32_t rs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                          vuint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsne_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                          uint32_t rs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                           vuint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsne_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                           uint64_t rs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                           vuint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsne_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                           uint64_t rs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                           vuint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsne_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                           uint64_t rs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                          vuint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsne_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                          uint64_t rs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vbool4_t __riscv_vmsltu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsltu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vbool2_t __riscv_vmsltu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsltu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vbool1_t __riscv_vmsltu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsltu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            vuint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            uint16_t rs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            vuint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            uint16_t rs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           vuint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           uint16_t rs1, size_t vl);
vbool4_t __riscv_vmsltu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           vuint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsltu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           uint16_t rs1, size_t vl);
vbool2_t __riscv_vmsltu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           vuint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsltu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           uint16_t rs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            vuint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            uint32_t rs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           vuint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           uint32_t rs1, size_t vl);
vbool4_t __riscv_vmsltu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           vuint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsltu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           uint32_t rs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsltu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsltu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsltu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           vuint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsltu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           uint64_t rs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vbool4_t __riscv_vmsleu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsleu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vbool2_t __riscv_vmsleu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsleu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vbool1_t __riscv_vmsleu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsleu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            vuint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            uint16_t rs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            vuint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            uint16_t rs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           vuint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           uint16_t rs1, size_t vl);
vbool4_t __riscv_vmsleu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           vuint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsleu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           uint16_t rs1, size_t vl);
vbool2_t __riscv_vmsleu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           vuint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsleu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           uint16_t rs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            vuint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            uint32_t rs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           vuint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           uint32_t rs1, size_t vl);
vbool4_t __riscv_vmsleu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           vuint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsleu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           uint32_t rs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsleu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsleu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsleu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           vuint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsleu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           uint64_t rs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vbool4_t __riscv_vmsgtu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsgtu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vbool2_t __riscv_vmsgtu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsgtu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vbool1_t __riscv_vmsgtu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsgtu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            vuint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            uint16_t rs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            vuint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            uint16_t rs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           vuint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           uint16_t rs1, size_t vl);
vbool4_t __riscv_vmsgtu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           vuint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsgtu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           uint16_t rs1, size_t vl);
vbool2_t __riscv_vmsgtu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           vuint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsgtu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           uint16_t rs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            vuint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            uint32_t rs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           vuint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           uint32_t rs1, size_t vl);
vbool4_t __riscv_vmsgtu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           vuint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsgtu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           uint32_t rs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsgtu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsgtu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsgtu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           vuint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsgtu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           uint64_t rs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vbool4_t __riscv_vmsgeu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vbool4_t __riscv_vmsgeu_mu(vbool4_t vm, vbool4_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vbool2_t __riscv_vmsgeu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vbool2_t __riscv_vmsgeu_mu(vbool2_t vm, vbool2_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vbool1_t __riscv_vmsgeu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vbool1_t __riscv_vmsgeu_mu(vbool1_t vm, vbool1_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            vuint16mf4_t vs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint16mf4_t vs2,
                            uint16_t rs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            vuint16mf2_t vs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint16mf2_t vs2,
                            uint16_t rs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           vuint16m2_t vs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint16m2_t vs2,
                           uint16_t rs1, size_t vl);
vbool4_t __riscv_vmsgeu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           vuint16m4_t vs1, size_t vl);
vbool4_t __riscv_vmsgeu_mu(vbool4_t vm, vbool4_t vd, vuint16m4_t vs2,
                           uint16_t rs1, size_t vl);
vbool2_t __riscv_vmsgeu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           vuint16m8_t vs1, size_t vl);
vbool2_t __riscv_vmsgeu_mu(vbool2_t vm, vbool2_t vd, vuint16m8_t vs2,
                           uint16_t rs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            vuint32mf2_t vs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint32mf2_t vs2,
                            uint32_t rs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           vuint32m4_t vs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint32m4_t vs2,
                           uint32_t rs1, size_t vl);
vbool4_t __riscv_vmsgeu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           vuint32m8_t vs1, size_t vl);
vbool4_t __riscv_vmsgeu_mu(vbool4_t vm, vbool4_t vd, vuint32m8_t vs2,
                           uint32_t rs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vbool64_t __riscv_vmsgeu_mu(vbool64_t vm, vbool64_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vbool32_t __riscv_vmsgeu_mu(vbool32_t vm, vbool32_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vbool16_t __riscv_vmsgeu_mu(vbool16_t vm, vbool16_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           vuint64m8_t vs1, size_t vl);
vbool8_t __riscv_vmsgeu_mu(vbool8_t vm, vbool8_t vd, vuint64m8_t vs2,
                           uint64_t rs1, size_t vl);

Vector Integer Min/Max Intrinsics

vint8mf8_t __riscv_vmin_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vmin_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vmin_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vmin_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vmin_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vmin_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vmin_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vmin_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vmin_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vmin_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vmin_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vmin_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vmin_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vmin_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmin_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vmin_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vmin_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vmin_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vmin_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vmin_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vmin_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vmin_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vmin_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vmin_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vmin_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vmin_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vmin_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vmin_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vmin_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vmin_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vmin_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vmin_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vmin_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vmin_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vmin_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vmin_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vmin_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vmin_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vmin_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vmin_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vmin_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vmin_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vmin_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vmin_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vint8mf8_t __riscv_vmax_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vmax_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vmax_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vmax_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vmax_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vmax_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vmax_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vmax_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vmax_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vmax_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vmax_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vmax_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vmax_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vmax_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmax_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vmax_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vmax_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vmax_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vmax_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vmax_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vmax_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vmax_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vmax_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vmax_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vmax_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vmax_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vmax_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vmax_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vmax_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vmax_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vmax_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vmax_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vmax_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vmax_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vmax_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vmax_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vmax_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vmax_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vmax_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vmax_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vmax_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vmax_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vmax_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vmax_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vuint8mf8_t __riscv_vminu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                             size_t vl);
vuint8mf8_t __riscv_vminu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf4_t __riscv_vminu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                             size_t vl);
vuint8mf4_t __riscv_vminu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf2_t __riscv_vminu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                             size_t vl);
vuint8mf2_t __riscv_vminu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m1_t __riscv_vminu_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                            size_t vl);
vuint8m1_t __riscv_vminu_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m2_t __riscv_vminu_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                            size_t vl);
vuint8m2_t __riscv_vminu_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m4_t __riscv_vminu_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                            size_t vl);
vuint8m4_t __riscv_vminu_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m8_t __riscv_vminu_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                            size_t vl);
vuint8m8_t __riscv_vminu_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                            size_t vl);
vuint16mf4_t __riscv_vminu_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vminu_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                              size_t vl);
vuint16mf2_t __riscv_vminu_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vminu_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m1_t __riscv_vminu_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                             size_t vl);
vuint16m1_t __riscv_vminu_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m2_t __riscv_vminu_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                             size_t vl);
vuint16m2_t __riscv_vminu_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m4_t __riscv_vminu_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                             size_t vl);
vuint16m4_t __riscv_vminu_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m8_t __riscv_vminu_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                             size_t vl);
vuint16m8_t __riscv_vminu_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                             size_t vl);
vuint32mf2_t __riscv_vminu_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vminu_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m1_t __riscv_vminu_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                             size_t vl);
vuint32m1_t __riscv_vminu_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m2_t __riscv_vminu_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                             size_t vl);
vuint32m2_t __riscv_vminu_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m4_t __riscv_vminu_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                             size_t vl);
vuint32m4_t __riscv_vminu_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m8_t __riscv_vminu_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                             size_t vl);
vuint32m8_t __riscv_vminu_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                             size_t vl);
vuint64m1_t __riscv_vminu_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                             size_t vl);
vuint64m1_t __riscv_vminu_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m2_t __riscv_vminu_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                             size_t vl);
vuint64m2_t __riscv_vminu_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m4_t __riscv_vminu_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                             size_t vl);
vuint64m4_t __riscv_vminu_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m8_t __riscv_vminu_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                             size_t vl);
vuint64m8_t __riscv_vminu_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                             size_t vl);
vuint8mf8_t __riscv_vmaxu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                             size_t vl);
vuint8mf8_t __riscv_vmaxu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf4_t __riscv_vmaxu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                             size_t vl);
vuint8mf4_t __riscv_vmaxu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf2_t __riscv_vmaxu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                             size_t vl);
vuint8mf2_t __riscv_vmaxu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m1_t __riscv_vmaxu_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                            size_t vl);
vuint8m1_t __riscv_vmaxu_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m2_t __riscv_vmaxu_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                            size_t vl);
vuint8m2_t __riscv_vmaxu_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m4_t __riscv_vmaxu_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                            size_t vl);
vuint8m4_t __riscv_vmaxu_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m8_t __riscv_vmaxu_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                            size_t vl);
vuint8m8_t __riscv_vmaxu_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                            size_t vl);
vuint16mf4_t __riscv_vmaxu_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmaxu_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                              size_t vl);
vuint16mf2_t __riscv_vmaxu_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmaxu_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m1_t __riscv_vmaxu_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                             size_t vl);
vuint16m1_t __riscv_vmaxu_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m2_t __riscv_vmaxu_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                             size_t vl);
vuint16m2_t __riscv_vmaxu_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m4_t __riscv_vmaxu_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                             size_t vl);
vuint16m4_t __riscv_vmaxu_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m8_t __riscv_vmaxu_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                             size_t vl);
vuint16m8_t __riscv_vmaxu_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                             size_t vl);
vuint32mf2_t __riscv_vmaxu_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmaxu_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m1_t __riscv_vmaxu_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                             size_t vl);
vuint32m1_t __riscv_vmaxu_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m2_t __riscv_vmaxu_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                             size_t vl);
vuint32m2_t __riscv_vmaxu_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m4_t __riscv_vmaxu_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                             size_t vl);
vuint32m4_t __riscv_vmaxu_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m8_t __riscv_vmaxu_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                             size_t vl);
vuint32m8_t __riscv_vmaxu_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                             size_t vl);
vuint64m1_t __riscv_vmaxu_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                             size_t vl);
vuint64m1_t __riscv_vmaxu_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m2_t __riscv_vmaxu_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                             size_t vl);
vuint64m2_t __riscv_vmaxu_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m4_t __riscv_vmaxu_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                             size_t vl);
vuint64m4_t __riscv_vmaxu_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m8_t __riscv_vmaxu_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                             size_t vl);
vuint64m8_t __riscv_vmaxu_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                             size_t vl);
// masked functions
vint8mf8_t __riscv_vmin_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmin_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmin_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmin_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmin_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmin_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vmin_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmin_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vmin_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmin_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vmin_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmin_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vmin_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmin_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vmin_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmin_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmin_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmin_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vmin_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmin_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vmin_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmin_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vmin_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmin_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vmin_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmin_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmin_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmin_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vmin_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmin_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vmin_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmin_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vmin_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmin_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vmin_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmin_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vmin_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmin_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vmin_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmin_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vmin_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmin_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vmin_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmin_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmax_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmax_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmax_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmax_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmax_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmax_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vmax_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmax_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vmax_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmax_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vmax_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmax_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vmax_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmax_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vmax_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmax_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmax_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmax_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vmax_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmax_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vmax_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmax_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vmax_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmax_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vmax_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmax_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmax_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmax_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vmax_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmax_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vmax_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmax_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vmax_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmax_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vmax_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmax_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vmax_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmax_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vmax_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmax_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vmax_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmax_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vmax_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmax_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vminu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vminu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vminu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vminu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vminu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vminu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vminu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vminu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vminu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vminu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vminu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vminu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vminu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vminu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vminu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vminu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vminu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vminu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vminu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vminu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vminu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vminu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vminu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vminu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vminu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vminu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vminu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vminu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vminu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vminu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vminu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vminu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vminu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vminu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vminu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vminu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vminu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vminu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vminu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vminu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vminu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vminu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vminu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vminu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmaxu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmaxu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmaxu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmaxu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmaxu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmaxu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmaxu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmaxu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmaxu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmaxu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmaxu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmaxu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmaxu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmaxu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmaxu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmaxu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmaxu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmaxu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmaxu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmaxu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmaxu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmaxu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmaxu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmaxu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmaxu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmaxu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmaxu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmaxu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmaxu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmaxu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmaxu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmaxu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmaxu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmaxu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmaxu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmaxu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmaxu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmaxu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmaxu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmaxu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmaxu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmaxu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmaxu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmaxu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vmin_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmin_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmin_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmin_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmin_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmin_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vmin_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmin_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vmin_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmin_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vmin_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmin_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vmin_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmin_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmin_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmin_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmin_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmin_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vmin_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmin_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vmin_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmin_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vmin_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmin_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vmin_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmin_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmin_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmin_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vmin_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmin_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vmin_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmin_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vmin_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmin_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vmin_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmin_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vmin_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmin_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vmin_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmin_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vmin_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmin_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vmin_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmin_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmax_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmax_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmax_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmax_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmax_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmax_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vmax_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmax_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vmax_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmax_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vmax_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmax_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vmax_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmax_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmax_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmax_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmax_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmax_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vmax_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmax_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vmax_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmax_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vmax_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmax_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vmax_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmax_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmax_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmax_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vmax_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmax_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vmax_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmax_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vmax_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmax_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vmax_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmax_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vmax_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmax_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vmax_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmax_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vmax_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmax_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vmax_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmax_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vminu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vminu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vminu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vminu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vminu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vminu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vminu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vminu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vminu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vminu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vminu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vminu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vminu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vminu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vminu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vminu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vminu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vminu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vminu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vminu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vminu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vminu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vminu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vminu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vminu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vminu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vminu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vminu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vminu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vminu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vminu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vminu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vminu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vminu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vminu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vminu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vminu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vminu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vminu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vminu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vminu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vminu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vminu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vminu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmaxu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmaxu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmaxu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmaxu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmaxu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmaxu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmaxu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmaxu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmaxu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmaxu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmaxu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmaxu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmaxu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmaxu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmaxu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmaxu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmaxu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmaxu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmaxu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmaxu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmaxu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmaxu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmaxu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmaxu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmaxu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmaxu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmaxu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmaxu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmaxu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmaxu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmaxu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmaxu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmaxu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmaxu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmaxu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmaxu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmaxu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmaxu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmaxu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmaxu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmaxu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmaxu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmaxu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmaxu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vmin_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmin_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmin_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmin_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmin_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmin_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vmin_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmin_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vmin_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmin_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vmin_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmin_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vmin_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmin_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vmin_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmin_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmin_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmin_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vmin_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmin_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vmin_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmin_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vmin_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmin_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vmin_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmin_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmin_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmin_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vmin_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmin_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vmin_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmin_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vmin_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmin_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vmin_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmin_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vmin_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmin_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vmin_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmin_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vmin_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmin_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vmin_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmin_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmax_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmax_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmax_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmax_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmax_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmax_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vmax_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmax_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vmax_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmax_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vmax_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmax_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vmax_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmax_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vmax_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmax_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmax_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmax_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vmax_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmax_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vmax_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmax_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vmax_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmax_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vmax_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmax_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmax_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmax_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vmax_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmax_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vmax_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmax_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vmax_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmax_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vmax_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmax_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vmax_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmax_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vmax_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmax_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vmax_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmax_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vmax_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmax_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vminu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vminu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vminu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vminu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vminu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vminu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vminu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vminu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vminu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vminu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vminu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vminu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vminu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vminu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vminu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vminu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vminu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vminu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vminu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vminu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vminu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vminu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vminu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vminu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vminu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vminu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vminu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vminu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vminu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vminu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vminu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vminu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vminu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vminu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vminu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vminu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vminu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vminu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vminu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vminu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vminu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vminu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vminu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vminu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmaxu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmaxu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmaxu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmaxu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmaxu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmaxu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmaxu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmaxu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmaxu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmaxu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmaxu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmaxu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmaxu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmaxu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmaxu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmaxu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmaxu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmaxu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmaxu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmaxu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmaxu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmaxu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmaxu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmaxu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmaxu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmaxu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmaxu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmaxu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmaxu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmaxu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmaxu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmaxu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmaxu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmaxu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmaxu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmaxu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmaxu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmaxu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmaxu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmaxu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmaxu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmaxu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmaxu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmaxu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);

Vector Single-Width Integer Multiply Intrinsics

vint8mf8_t __riscv_vmul_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vmul_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vmul_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vmul_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vmul_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vmul_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vmul_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vmul_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vmul_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vmul_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vmul_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vmul_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vmul_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vmul_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmul_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vmul_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vmul_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vmul_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vmul_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vmul_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vmul_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vmul_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vmul_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vmul_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vmul_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vmul_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vmul_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vmul_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vmul_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vmul_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vmul_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vmul_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vmul_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vmul_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vmul_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vmul_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vmul_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vmul_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vmul_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vmul_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vmul_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vmul_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vmul_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vmul_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vint8mf8_t __riscv_vmulh_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                            size_t vl);
vint8mf8_t __riscv_vmulh_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                            size_t vl);
vint8mf4_t __riscv_vmulh_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                            size_t vl);
vint8mf4_t __riscv_vmulh_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                            size_t vl);
vint8mf2_t __riscv_vmulh_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                            size_t vl);
vint8mf2_t __riscv_vmulh_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                            size_t vl);
vint8m1_t __riscv_vmulh_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                           size_t vl);
vint8m1_t __riscv_vmulh_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vmulh_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                           size_t vl);
vint8m2_t __riscv_vmulh_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vmulh_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                           size_t vl);
vint8m4_t __riscv_vmulh_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vmulh_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                           size_t vl);
vint8m8_t __riscv_vmulh_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmulh_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                             size_t vl);
vint16mf4_t __riscv_vmulh_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                             size_t vl);
vint16mf2_t __riscv_vmulh_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                             size_t vl);
vint16mf2_t __riscv_vmulh_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                             size_t vl);
vint16m1_t __riscv_vmulh_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                            size_t vl);
vint16m1_t __riscv_vmulh_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                            size_t vl);
vint16m2_t __riscv_vmulh_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                            size_t vl);
vint16m2_t __riscv_vmulh_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                            size_t vl);
vint16m4_t __riscv_vmulh_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                            size_t vl);
vint16m4_t __riscv_vmulh_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                            size_t vl);
vint16m8_t __riscv_vmulh_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                            size_t vl);
vint16m8_t __riscv_vmulh_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                            size_t vl);
vint32mf2_t __riscv_vmulh_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                             size_t vl);
vint32mf2_t __riscv_vmulh_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                             size_t vl);
vint32m1_t __riscv_vmulh_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                            size_t vl);
vint32m1_t __riscv_vmulh_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                            size_t vl);
vint32m2_t __riscv_vmulh_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                            size_t vl);
vint32m2_t __riscv_vmulh_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                            size_t vl);
vint32m4_t __riscv_vmulh_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                            size_t vl);
vint32m4_t __riscv_vmulh_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                            size_t vl);
vint32m8_t __riscv_vmulh_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                            size_t vl);
vint32m8_t __riscv_vmulh_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                            size_t vl);
vint64m1_t __riscv_vmulh_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                            size_t vl);
vint64m1_t __riscv_vmulh_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                            size_t vl);
vint64m2_t __riscv_vmulh_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                            size_t vl);
vint64m2_t __riscv_vmulh_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                            size_t vl);
vint64m4_t __riscv_vmulh_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                            size_t vl);
vint64m4_t __riscv_vmulh_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                            size_t vl);
vint64m8_t __riscv_vmulh_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                            size_t vl);
vint64m8_t __riscv_vmulh_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                            size_t vl);
vint8mf8_t __riscv_vmulhsu_tu(vint8mf8_t vd, vint8mf8_t vs2, vuint8mf8_t vs1,
                              size_t vl);
vint8mf8_t __riscv_vmulhsu_tu(vint8mf8_t vd, vint8mf8_t vs2, uint8_t rs1,
                              size_t vl);
vint8mf4_t __riscv_vmulhsu_tu(vint8mf4_t vd, vint8mf4_t vs2, vuint8mf4_t vs1,
                              size_t vl);
vint8mf4_t __riscv_vmulhsu_tu(vint8mf4_t vd, vint8mf4_t vs2, uint8_t rs1,
                              size_t vl);
vint8mf2_t __riscv_vmulhsu_tu(vint8mf2_t vd, vint8mf2_t vs2, vuint8mf2_t vs1,
                              size_t vl);
vint8mf2_t __riscv_vmulhsu_tu(vint8mf2_t vd, vint8mf2_t vs2, uint8_t rs1,
                              size_t vl);
vint8m1_t __riscv_vmulhsu_tu(vint8m1_t vd, vint8m1_t vs2, vuint8m1_t vs1,
                             size_t vl);
vint8m1_t __riscv_vmulhsu_tu(vint8m1_t vd, vint8m1_t vs2, uint8_t rs1,
                             size_t vl);
vint8m2_t __riscv_vmulhsu_tu(vint8m2_t vd, vint8m2_t vs2, vuint8m2_t vs1,
                             size_t vl);
vint8m2_t __riscv_vmulhsu_tu(vint8m2_t vd, vint8m2_t vs2, uint8_t rs1,
                             size_t vl);
vint8m4_t __riscv_vmulhsu_tu(vint8m4_t vd, vint8m4_t vs2, vuint8m4_t vs1,
                             size_t vl);
vint8m4_t __riscv_vmulhsu_tu(vint8m4_t vd, vint8m4_t vs2, uint8_t rs1,
                             size_t vl);
vint8m8_t __riscv_vmulhsu_tu(vint8m8_t vd, vint8m8_t vs2, vuint8m8_t vs1,
                             size_t vl);
vint8m8_t __riscv_vmulhsu_tu(vint8m8_t vd, vint8m8_t vs2, uint8_t rs1,
                             size_t vl);
vint16mf4_t __riscv_vmulhsu_tu(vint16mf4_t vd, vint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmulhsu_tu(vint16mf4_t vd, vint16mf4_t vs2, uint16_t rs1,
                               size_t vl);
vint16mf2_t __riscv_vmulhsu_tu(vint16mf2_t vd, vint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmulhsu_tu(vint16mf2_t vd, vint16mf2_t vs2, uint16_t rs1,
                               size_t vl);
vint16m1_t __riscv_vmulhsu_tu(vint16m1_t vd, vint16m1_t vs2, vuint16m1_t vs1,
                              size_t vl);
vint16m1_t __riscv_vmulhsu_tu(vint16m1_t vd, vint16m1_t vs2, uint16_t rs1,
                              size_t vl);
vint16m2_t __riscv_vmulhsu_tu(vint16m2_t vd, vint16m2_t vs2, vuint16m2_t vs1,
                              size_t vl);
vint16m2_t __riscv_vmulhsu_tu(vint16m2_t vd, vint16m2_t vs2, uint16_t rs1,
                              size_t vl);
vint16m4_t __riscv_vmulhsu_tu(vint16m4_t vd, vint16m4_t vs2, vuint16m4_t vs1,
                              size_t vl);
vint16m4_t __riscv_vmulhsu_tu(vint16m4_t vd, vint16m4_t vs2, uint16_t rs1,
                              size_t vl);
vint16m8_t __riscv_vmulhsu_tu(vint16m8_t vd, vint16m8_t vs2, vuint16m8_t vs1,
                              size_t vl);
vint16m8_t __riscv_vmulhsu_tu(vint16m8_t vd, vint16m8_t vs2, uint16_t rs1,
                              size_t vl);
vint32mf2_t __riscv_vmulhsu_tu(vint32mf2_t vd, vint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmulhsu_tu(vint32mf2_t vd, vint32mf2_t vs2, uint32_t rs1,
                               size_t vl);
vint32m1_t __riscv_vmulhsu_tu(vint32m1_t vd, vint32m1_t vs2, vuint32m1_t vs1,
                              size_t vl);
vint32m1_t __riscv_vmulhsu_tu(vint32m1_t vd, vint32m1_t vs2, uint32_t rs1,
                              size_t vl);
vint32m2_t __riscv_vmulhsu_tu(vint32m2_t vd, vint32m2_t vs2, vuint32m2_t vs1,
                              size_t vl);
vint32m2_t __riscv_vmulhsu_tu(vint32m2_t vd, vint32m2_t vs2, uint32_t rs1,
                              size_t vl);
vint32m4_t __riscv_vmulhsu_tu(vint32m4_t vd, vint32m4_t vs2, vuint32m4_t vs1,
                              size_t vl);
vint32m4_t __riscv_vmulhsu_tu(vint32m4_t vd, vint32m4_t vs2, uint32_t rs1,
                              size_t vl);
vint32m8_t __riscv_vmulhsu_tu(vint32m8_t vd, vint32m8_t vs2, vuint32m8_t vs1,
                              size_t vl);
vint32m8_t __riscv_vmulhsu_tu(vint32m8_t vd, vint32m8_t vs2, uint32_t rs1,
                              size_t vl);
vint64m1_t __riscv_vmulhsu_tu(vint64m1_t vd, vint64m1_t vs2, vuint64m1_t vs1,
                              size_t vl);
vint64m1_t __riscv_vmulhsu_tu(vint64m1_t vd, vint64m1_t vs2, uint64_t rs1,
                              size_t vl);
vint64m2_t __riscv_vmulhsu_tu(vint64m2_t vd, vint64m2_t vs2, vuint64m2_t vs1,
                              size_t vl);
vint64m2_t __riscv_vmulhsu_tu(vint64m2_t vd, vint64m2_t vs2, uint64_t rs1,
                              size_t vl);
vint64m4_t __riscv_vmulhsu_tu(vint64m4_t vd, vint64m4_t vs2, vuint64m4_t vs1,
                              size_t vl);
vint64m4_t __riscv_vmulhsu_tu(vint64m4_t vd, vint64m4_t vs2, uint64_t rs1,
                              size_t vl);
vint64m8_t __riscv_vmulhsu_tu(vint64m8_t vd, vint64m8_t vs2, vuint64m8_t vs1,
                              size_t vl);
vint64m8_t __riscv_vmulhsu_tu(vint64m8_t vd, vint64m8_t vs2, uint64_t rs1,
                              size_t vl);
vuint8mf8_t __riscv_vmul_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                            size_t vl);
vuint8mf8_t __riscv_vmul_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf4_t __riscv_vmul_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                            size_t vl);
vuint8mf4_t __riscv_vmul_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8mf2_t __riscv_vmul_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                            size_t vl);
vuint8mf2_t __riscv_vmul_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m1_t __riscv_vmul_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                           size_t vl);
vuint8m1_t __riscv_vmul_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m2_t __riscv_vmul_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                           size_t vl);
vuint8m2_t __riscv_vmul_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m4_t __riscv_vmul_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                           size_t vl);
vuint8m4_t __riscv_vmul_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                           size_t vl);
vuint8m8_t __riscv_vmul_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                           size_t vl);
vuint8m8_t __riscv_vmul_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                           size_t vl);
vuint16mf4_t __riscv_vmul_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmul_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16mf2_t __riscv_vmul_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmul_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m1_t __riscv_vmul_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                            size_t vl);
vuint16m1_t __riscv_vmul_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m2_t __riscv_vmul_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                            size_t vl);
vuint16m2_t __riscv_vmul_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m4_t __riscv_vmul_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                            size_t vl);
vuint16m4_t __riscv_vmul_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                            size_t vl);
vuint16m8_t __riscv_vmul_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                            size_t vl);
vuint16m8_t __riscv_vmul_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                            size_t vl);
vuint32mf2_t __riscv_vmul_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmul_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m1_t __riscv_vmul_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                            size_t vl);
vuint32m1_t __riscv_vmul_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m2_t __riscv_vmul_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                            size_t vl);
vuint32m2_t __riscv_vmul_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m4_t __riscv_vmul_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                            size_t vl);
vuint32m4_t __riscv_vmul_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                            size_t vl);
vuint32m8_t __riscv_vmul_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                            size_t vl);
vuint32m8_t __riscv_vmul_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                            size_t vl);
vuint64m1_t __riscv_vmul_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                            size_t vl);
vuint64m1_t __riscv_vmul_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m2_t __riscv_vmul_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                            size_t vl);
vuint64m2_t __riscv_vmul_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m4_t __riscv_vmul_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                            size_t vl);
vuint64m4_t __riscv_vmul_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                            size_t vl);
vuint64m8_t __riscv_vmul_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                            size_t vl);
vuint64m8_t __riscv_vmul_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                            size_t vl);
vuint8mf8_t __riscv_vmulhu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                              size_t vl);
vuint8mf8_t __riscv_vmulhu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                              size_t vl);
vuint8mf4_t __riscv_vmulhu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                              size_t vl);
vuint8mf4_t __riscv_vmulhu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                              size_t vl);
vuint8mf2_t __riscv_vmulhu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                              size_t vl);
vuint8mf2_t __riscv_vmulhu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                              size_t vl);
vuint8m1_t __riscv_vmulhu_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                             size_t vl);
vuint8m1_t __riscv_vmulhu_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m2_t __riscv_vmulhu_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                             size_t vl);
vuint8m2_t __riscv_vmulhu_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m4_t __riscv_vmulhu_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                             size_t vl);
vuint8m4_t __riscv_vmulhu_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m8_t __riscv_vmulhu_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                             size_t vl);
vuint8m8_t __riscv_vmulhu_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                             size_t vl);
vuint16mf4_t __riscv_vmulhu_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmulhu_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                               size_t vl);
vuint16mf2_t __riscv_vmulhu_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmulhu_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                               size_t vl);
vuint16m1_t __riscv_vmulhu_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                              size_t vl);
vuint16m1_t __riscv_vmulhu_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m2_t __riscv_vmulhu_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                              size_t vl);
vuint16m2_t __riscv_vmulhu_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m4_t __riscv_vmulhu_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                              size_t vl);
vuint16m4_t __riscv_vmulhu_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m8_t __riscv_vmulhu_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                              size_t vl);
vuint16m8_t __riscv_vmulhu_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                              size_t vl);
vuint32mf2_t __riscv_vmulhu_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmulhu_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                               size_t vl);
vuint32m1_t __riscv_vmulhu_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                              size_t vl);
vuint32m1_t __riscv_vmulhu_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m2_t __riscv_vmulhu_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                              size_t vl);
vuint32m2_t __riscv_vmulhu_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m4_t __riscv_vmulhu_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                              size_t vl);
vuint32m4_t __riscv_vmulhu_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m8_t __riscv_vmulhu_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                              size_t vl);
vuint32m8_t __riscv_vmulhu_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                              size_t vl);
vuint64m1_t __riscv_vmulhu_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                              size_t vl);
vuint64m1_t __riscv_vmulhu_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                              size_t vl);
vuint64m2_t __riscv_vmulhu_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                              size_t vl);
vuint64m2_t __riscv_vmulhu_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                              size_t vl);
vuint64m4_t __riscv_vmulhu_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                              size_t vl);
vuint64m4_t __riscv_vmulhu_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                              size_t vl);
vuint64m8_t __riscv_vmulhu_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                              size_t vl);
vuint64m8_t __riscv_vmulhu_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                              size_t vl);
// masked functions
vint8mf8_t __riscv_vmul_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmul_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmul_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmul_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmul_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmul_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vmul_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmul_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vmul_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmul_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vmul_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmul_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vmul_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmul_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vmul_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmul_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmul_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmul_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vmul_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmul_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vmul_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmul_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vmul_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmul_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vmul_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmul_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmul_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmul_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vmul_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmul_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vmul_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmul_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vmul_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmul_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vmul_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmul_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vmul_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmul_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vmul_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmul_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vmul_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmul_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vmul_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmul_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmulh_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmulh_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmulh_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmulh_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmulh_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmulh_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vmulh_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmulh_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vmulh_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmulh_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vmulh_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmulh_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vmulh_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmulh_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmulh_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmulh_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmulh_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmulh_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vmulh_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmulh_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vmulh_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmulh_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vmulh_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmulh_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vmulh_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmulh_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmulh_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmulh_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vmulh_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmulh_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vmulh_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmulh_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vmulh_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmulh_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vmulh_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmulh_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vmulh_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmulh_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vmulh_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmulh_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vmulh_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmulh_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vmulh_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmulh_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmulhsu_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmulhsu_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vint8mf4_t __riscv_vmulhsu_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmulhsu_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vint8mf2_t __riscv_vmulhsu_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmulhsu_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vint8m1_t __riscv_vmulhsu_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmulhsu_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vint8m2_t __riscv_vmulhsu_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmulhsu_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vint8m4_t __riscv_vmulhsu_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmulhsu_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vint8m8_t __riscv_vmulhsu_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                              vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmulhsu_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                              uint8_t rs1, size_t vl);
vint16mf4_t __riscv_vmulhsu_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmulhsu_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vint16mf2_t __riscv_vmulhsu_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmulhsu_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vint16m1_t __riscv_vmulhsu_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmulhsu_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vint16m2_t __riscv_vmulhsu_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmulhsu_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vint16m4_t __riscv_vmulhsu_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmulhsu_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vint16m8_t __riscv_vmulhsu_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmulhsu_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                               uint16_t rs1, size_t vl);
vint32mf2_t __riscv_vmulhsu_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmulhsu_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vint32m1_t __riscv_vmulhsu_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmulhsu_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vint32m2_t __riscv_vmulhsu_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmulhsu_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vint32m4_t __riscv_vmulhsu_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmulhsu_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vint32m8_t __riscv_vmulhsu_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmulhsu_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                               uint32_t rs1, size_t vl);
vint64m1_t __riscv_vmulhsu_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmulhsu_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                               uint64_t rs1, size_t vl);
vint64m2_t __riscv_vmulhsu_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmulhsu_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                               uint64_t rs1, size_t vl);
vint64m4_t __riscv_vmulhsu_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmulhsu_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                               uint64_t rs1, size_t vl);
vint64m8_t __riscv_vmulhsu_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmulhsu_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                               uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmul_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmul_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmul_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmul_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmul_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmul_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmul_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmul_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmul_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmul_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmul_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmul_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmul_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmul_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmul_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmul_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmul_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmul_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmul_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmul_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmul_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmul_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmul_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmul_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmul_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmul_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmul_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmul_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmul_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmul_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmul_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmul_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmul_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmul_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmul_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmul_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmul_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmul_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmul_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmul_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmul_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmul_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmul_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmul_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmulhu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmulhu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmulhu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmulhu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmulhu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmulhu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmulhu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmulhu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmulhu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmulhu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmulhu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmulhu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmulhu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmulhu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmulhu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmulhu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmulhu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmulhu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmulhu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmulhu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmulhu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmulhu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmulhu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmulhu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmulhu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmulhu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmulhu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmulhu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmulhu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmulhu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmulhu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmulhu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmulhu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmulhu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmulhu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmulhu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmulhu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmulhu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmulhu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmulhu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmulhu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmulhu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmulhu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmulhu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vmul_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmul_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmul_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmul_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmul_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmul_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vmul_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmul_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vmul_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmul_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vmul_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmul_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vmul_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmul_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmul_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmul_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmul_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmul_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vmul_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmul_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vmul_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmul_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vmul_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmul_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vmul_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmul_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmul_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmul_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vmul_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmul_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vmul_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmul_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vmul_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmul_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vmul_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmul_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vmul_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmul_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vmul_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmul_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vmul_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmul_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vmul_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmul_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmulh_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                              vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmulh_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                              int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmulh_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                              vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmulh_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                              int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmulh_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                              vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmulh_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                              int8_t rs1, size_t vl);
vint8m1_t __riscv_vmulh_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                             vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmulh_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                             int8_t rs1, size_t vl);
vint8m2_t __riscv_vmulh_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                             vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmulh_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                             int8_t rs1, size_t vl);
vint8m4_t __riscv_vmulh_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                             vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmulh_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                             int8_t rs1, size_t vl);
vint8m8_t __riscv_vmulh_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                             vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmulh_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                             int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmulh_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                               vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmulh_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                               int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmulh_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                               vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmulh_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                               int16_t rs1, size_t vl);
vint16m1_t __riscv_vmulh_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                              vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmulh_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                              int16_t rs1, size_t vl);
vint16m2_t __riscv_vmulh_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                              vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmulh_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                              int16_t rs1, size_t vl);
vint16m4_t __riscv_vmulh_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                              vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmulh_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                              int16_t rs1, size_t vl);
vint16m8_t __riscv_vmulh_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                              vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmulh_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                              int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmulh_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                               vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmulh_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                               int32_t rs1, size_t vl);
vint32m1_t __riscv_vmulh_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                              vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmulh_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                              int32_t rs1, size_t vl);
vint32m2_t __riscv_vmulh_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                              vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmulh_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                              int32_t rs1, size_t vl);
vint32m4_t __riscv_vmulh_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                              vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmulh_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                              int32_t rs1, size_t vl);
vint32m8_t __riscv_vmulh_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                              vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmulh_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                              int32_t rs1, size_t vl);
vint64m1_t __riscv_vmulh_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                              vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmulh_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                              int64_t rs1, size_t vl);
vint64m2_t __riscv_vmulh_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                              vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmulh_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                              int64_t rs1, size_t vl);
vint64m4_t __riscv_vmulh_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                              vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmulh_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                              int64_t rs1, size_t vl);
vint64m8_t __riscv_vmulh_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                              vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmulh_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                              int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmulhsu_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmulhsu_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                                uint8_t rs1, size_t vl);
vint8mf4_t __riscv_vmulhsu_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmulhsu_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                                uint8_t rs1, size_t vl);
vint8mf2_t __riscv_vmulhsu_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmulhsu_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                                uint8_t rs1, size_t vl);
vint8m1_t __riscv_vmulhsu_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                               vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmulhsu_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                               uint8_t rs1, size_t vl);
vint8m2_t __riscv_vmulhsu_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                               vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmulhsu_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                               uint8_t rs1, size_t vl);
vint8m4_t __riscv_vmulhsu_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                               vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmulhsu_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                               uint8_t rs1, size_t vl);
vint8m8_t __riscv_vmulhsu_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                               vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmulhsu_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                               uint8_t rs1, size_t vl);
vint16mf4_t __riscv_vmulhsu_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmulhsu_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                                 uint16_t rs1, size_t vl);
vint16mf2_t __riscv_vmulhsu_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmulhsu_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                                 uint16_t rs1, size_t vl);
vint16m1_t __riscv_vmulhsu_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmulhsu_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                                uint16_t rs1, size_t vl);
vint16m2_t __riscv_vmulhsu_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmulhsu_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                                uint16_t rs1, size_t vl);
vint16m4_t __riscv_vmulhsu_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmulhsu_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                                uint16_t rs1, size_t vl);
vint16m8_t __riscv_vmulhsu_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmulhsu_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                                uint16_t rs1, size_t vl);
vint32mf2_t __riscv_vmulhsu_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmulhsu_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                                 uint32_t rs1, size_t vl);
vint32m1_t __riscv_vmulhsu_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmulhsu_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                                uint32_t rs1, size_t vl);
vint32m2_t __riscv_vmulhsu_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmulhsu_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                                uint32_t rs1, size_t vl);
vint32m4_t __riscv_vmulhsu_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmulhsu_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                                uint32_t rs1, size_t vl);
vint32m8_t __riscv_vmulhsu_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmulhsu_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                                uint32_t rs1, size_t vl);
vint64m1_t __riscv_vmulhsu_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmulhsu_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                                uint64_t rs1, size_t vl);
vint64m2_t __riscv_vmulhsu_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmulhsu_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                                uint64_t rs1, size_t vl);
vint64m4_t __riscv_vmulhsu_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmulhsu_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                                uint64_t rs1, size_t vl);
vint64m8_t __riscv_vmulhsu_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmulhsu_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                                uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmul_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmul_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmul_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmul_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmul_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmul_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmul_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmul_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmul_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmul_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmul_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmul_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmul_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmul_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmul_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmul_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmul_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmul_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmul_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmul_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmul_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmul_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmul_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmul_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmul_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmul_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmul_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmul_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmul_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmul_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmul_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmul_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmul_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmul_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmul_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmul_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmul_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmul_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmul_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmul_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmul_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmul_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmul_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmul_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmulhu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmulhu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                                uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmulhu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmulhu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                                uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmulhu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmulhu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                                uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmulhu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                               vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmulhu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmulhu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                               vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmulhu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmulhu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                               vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmulhu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmulhu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                               vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmulhu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                               uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmulhu_tumu(vbool64_t vm, vuint16mf4_t vd,
                                 vuint16mf4_t vs2, vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmulhu_tumu(vbool64_t vm, vuint16mf4_t vd,
                                 vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmulhu_tumu(vbool32_t vm, vuint16mf2_t vd,
                                 vuint16mf2_t vs2, vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmulhu_tumu(vbool32_t vm, vuint16mf2_t vd,
                                 vuint16mf2_t vs2, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmulhu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmulhu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmulhu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmulhu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmulhu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmulhu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmulhu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmulhu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                                uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmulhu_tumu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint32mf2_t vs2, vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmulhu_tumu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint32mf2_t vs2, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmulhu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmulhu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmulhu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmulhu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmulhu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmulhu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmulhu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmulhu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                                uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmulhu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmulhu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                                uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmulhu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmulhu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                                uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmulhu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmulhu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                                uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmulhu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmulhu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                                uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vmul_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmul_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmul_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmul_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmul_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmul_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vmul_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmul_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vmul_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmul_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vmul_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmul_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vmul_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmul_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vmul_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmul_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmul_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmul_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vmul_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmul_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vmul_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmul_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vmul_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmul_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vmul_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmul_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmul_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmul_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vmul_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmul_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vmul_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmul_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vmul_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmul_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vmul_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmul_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vmul_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmul_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vmul_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmul_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vmul_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmul_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vmul_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmul_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmulh_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmulh_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmulh_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmulh_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmulh_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmulh_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vmulh_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmulh_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vmulh_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmulh_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vmulh_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmulh_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vmulh_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmulh_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vmulh_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmulh_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmulh_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmulh_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vmulh_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmulh_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vmulh_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmulh_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vmulh_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmulh_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vmulh_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmulh_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmulh_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmulh_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vmulh_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmulh_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vmulh_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmulh_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vmulh_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmulh_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vmulh_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmulh_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vmulh_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmulh_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vmulh_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmulh_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vmulh_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmulh_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vmulh_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmulh_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vmulhsu_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmulhsu_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vint8mf4_t __riscv_vmulhsu_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmulhsu_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vint8mf2_t __riscv_vmulhsu_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmulhsu_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vint8m1_t __riscv_vmulhsu_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmulhsu_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vint8m2_t __riscv_vmulhsu_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmulhsu_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vint8m4_t __riscv_vmulhsu_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmulhsu_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vint8m8_t __riscv_vmulhsu_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmulhsu_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vint16mf4_t __riscv_vmulhsu_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmulhsu_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vint16mf2_t __riscv_vmulhsu_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmulhsu_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vint16m1_t __riscv_vmulhsu_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmulhsu_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vint16m2_t __riscv_vmulhsu_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmulhsu_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vint16m4_t __riscv_vmulhsu_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmulhsu_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vint16m8_t __riscv_vmulhsu_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmulhsu_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vint32mf2_t __riscv_vmulhsu_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmulhsu_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vint32m1_t __riscv_vmulhsu_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmulhsu_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vint32m2_t __riscv_vmulhsu_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmulhsu_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vint32m4_t __riscv_vmulhsu_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmulhsu_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vint32m8_t __riscv_vmulhsu_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmulhsu_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vint64m1_t __riscv_vmulhsu_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmulhsu_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vint64m2_t __riscv_vmulhsu_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmulhsu_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vint64m4_t __riscv_vmulhsu_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmulhsu_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vint64m8_t __riscv_vmulhsu_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmulhsu_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                              uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmul_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmul_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmul_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmul_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmul_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmul_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmul_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmul_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmul_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmul_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmul_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmul_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                           uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmul_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmul_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                           uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmul_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmul_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmul_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmul_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmul_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmul_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmul_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmul_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmul_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmul_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                            uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmul_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmul_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                            uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmul_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmul_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmul_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmul_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmul_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmul_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmul_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmul_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                            uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmul_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmul_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                            uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmul_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmul_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmul_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmul_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmul_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmul_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                            uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmul_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmul_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                            uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmulhu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmulhu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmulhu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmulhu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmulhu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmulhu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmulhu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmulhu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmulhu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmulhu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmulhu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmulhu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmulhu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmulhu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmulhu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmulhu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmulhu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmulhu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmulhu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmulhu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmulhu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmulhu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmulhu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmulhu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmulhu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmulhu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmulhu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmulhu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmulhu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmulhu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmulhu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmulhu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmulhu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmulhu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmulhu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmulhu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmulhu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmulhu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmulhu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmulhu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmulhu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmulhu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmulhu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmulhu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);

Vector Integer Divide Intrinsics

vint8mf8_t __riscv_vdiv_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vdiv_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vdiv_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vdiv_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vdiv_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vdiv_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vdiv_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vdiv_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vdiv_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vdiv_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vdiv_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vdiv_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vdiv_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vdiv_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vdiv_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vdiv_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vdiv_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vdiv_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vdiv_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vdiv_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vdiv_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vdiv_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vdiv_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vdiv_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vdiv_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vdiv_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vdiv_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vdiv_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vdiv_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vdiv_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vdiv_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vdiv_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vdiv_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vdiv_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vdiv_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vdiv_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vdiv_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vdiv_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vdiv_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vdiv_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vdiv_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vdiv_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vdiv_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vdiv_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vint8mf8_t __riscv_vrem_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                           size_t vl);
vint8mf8_t __riscv_vrem_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                           size_t vl);
vint8mf4_t __riscv_vrem_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                           size_t vl);
vint8mf4_t __riscv_vrem_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                           size_t vl);
vint8mf2_t __riscv_vrem_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                           size_t vl);
vint8mf2_t __riscv_vrem_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                           size_t vl);
vint8m1_t __riscv_vrem_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                          size_t vl);
vint8m1_t __riscv_vrem_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1, size_t vl);
vint8m2_t __riscv_vrem_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                          size_t vl);
vint8m2_t __riscv_vrem_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1, size_t vl);
vint8m4_t __riscv_vrem_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                          size_t vl);
vint8m4_t __riscv_vrem_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1, size_t vl);
vint8m8_t __riscv_vrem_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                          size_t vl);
vint8m8_t __riscv_vrem_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vrem_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                            size_t vl);
vint16mf4_t __riscv_vrem_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                            size_t vl);
vint16mf2_t __riscv_vrem_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint16mf2_t __riscv_vrem_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint16m1_t __riscv_vrem_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                           size_t vl);
vint16m1_t __riscv_vrem_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                           size_t vl);
vint16m2_t __riscv_vrem_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                           size_t vl);
vint16m2_t __riscv_vrem_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                           size_t vl);
vint16m4_t __riscv_vrem_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                           size_t vl);
vint16m4_t __riscv_vrem_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                           size_t vl);
vint16m8_t __riscv_vrem_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                           size_t vl);
vint16m8_t __riscv_vrem_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                           size_t vl);
vint32mf2_t __riscv_vrem_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint32mf2_t __riscv_vrem_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint32m1_t __riscv_vrem_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                           size_t vl);
vint32m1_t __riscv_vrem_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                           size_t vl);
vint32m2_t __riscv_vrem_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                           size_t vl);
vint32m2_t __riscv_vrem_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                           size_t vl);
vint32m4_t __riscv_vrem_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                           size_t vl);
vint32m4_t __riscv_vrem_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                           size_t vl);
vint32m8_t __riscv_vrem_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                           size_t vl);
vint32m8_t __riscv_vrem_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                           size_t vl);
vint64m1_t __riscv_vrem_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                           size_t vl);
vint64m1_t __riscv_vrem_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                           size_t vl);
vint64m2_t __riscv_vrem_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                           size_t vl);
vint64m2_t __riscv_vrem_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                           size_t vl);
vint64m4_t __riscv_vrem_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                           size_t vl);
vint64m4_t __riscv_vrem_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                           size_t vl);
vint64m8_t __riscv_vrem_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                           size_t vl);
vint64m8_t __riscv_vrem_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                           size_t vl);
vuint8mf8_t __riscv_vdivu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                             size_t vl);
vuint8mf8_t __riscv_vdivu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf4_t __riscv_vdivu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                             size_t vl);
vuint8mf4_t __riscv_vdivu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf2_t __riscv_vdivu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                             size_t vl);
vuint8mf2_t __riscv_vdivu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m1_t __riscv_vdivu_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                            size_t vl);
vuint8m1_t __riscv_vdivu_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m2_t __riscv_vdivu_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                            size_t vl);
vuint8m2_t __riscv_vdivu_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m4_t __riscv_vdivu_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                            size_t vl);
vuint8m4_t __riscv_vdivu_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m8_t __riscv_vdivu_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                            size_t vl);
vuint8m8_t __riscv_vdivu_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                            size_t vl);
vuint16mf4_t __riscv_vdivu_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vdivu_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                              size_t vl);
vuint16mf2_t __riscv_vdivu_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vdivu_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m1_t __riscv_vdivu_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                             size_t vl);
vuint16m1_t __riscv_vdivu_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m2_t __riscv_vdivu_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                             size_t vl);
vuint16m2_t __riscv_vdivu_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m4_t __riscv_vdivu_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                             size_t vl);
vuint16m4_t __riscv_vdivu_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m8_t __riscv_vdivu_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                             size_t vl);
vuint16m8_t __riscv_vdivu_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                             size_t vl);
vuint32mf2_t __riscv_vdivu_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vdivu_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m1_t __riscv_vdivu_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                             size_t vl);
vuint32m1_t __riscv_vdivu_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m2_t __riscv_vdivu_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                             size_t vl);
vuint32m2_t __riscv_vdivu_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m4_t __riscv_vdivu_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                             size_t vl);
vuint32m4_t __riscv_vdivu_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m8_t __riscv_vdivu_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                             size_t vl);
vuint32m8_t __riscv_vdivu_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                             size_t vl);
vuint64m1_t __riscv_vdivu_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                             size_t vl);
vuint64m1_t __riscv_vdivu_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m2_t __riscv_vdivu_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                             size_t vl);
vuint64m2_t __riscv_vdivu_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m4_t __riscv_vdivu_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                             size_t vl);
vuint64m4_t __riscv_vdivu_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m8_t __riscv_vdivu_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                             size_t vl);
vuint64m8_t __riscv_vdivu_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                             size_t vl);
vuint8mf8_t __riscv_vremu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                             size_t vl);
vuint8mf8_t __riscv_vremu_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf4_t __riscv_vremu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                             size_t vl);
vuint8mf4_t __riscv_vremu_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                             size_t vl);
vuint8mf2_t __riscv_vremu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                             size_t vl);
vuint8mf2_t __riscv_vremu_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                             size_t vl);
vuint8m1_t __riscv_vremu_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                            size_t vl);
vuint8m1_t __riscv_vremu_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m2_t __riscv_vremu_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                            size_t vl);
vuint8m2_t __riscv_vremu_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m4_t __riscv_vremu_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                            size_t vl);
vuint8m4_t __riscv_vremu_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                            size_t vl);
vuint8m8_t __riscv_vremu_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                            size_t vl);
vuint8m8_t __riscv_vremu_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                            size_t vl);
vuint16mf4_t __riscv_vremu_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vremu_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                              size_t vl);
vuint16mf2_t __riscv_vremu_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vremu_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                              size_t vl);
vuint16m1_t __riscv_vremu_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                             size_t vl);
vuint16m1_t __riscv_vremu_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m2_t __riscv_vremu_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                             size_t vl);
vuint16m2_t __riscv_vremu_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m4_t __riscv_vremu_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                             size_t vl);
vuint16m4_t __riscv_vremu_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                             size_t vl);
vuint16m8_t __riscv_vremu_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                             size_t vl);
vuint16m8_t __riscv_vremu_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                             size_t vl);
vuint32mf2_t __riscv_vremu_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vremu_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                              size_t vl);
vuint32m1_t __riscv_vremu_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                             size_t vl);
vuint32m1_t __riscv_vremu_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m2_t __riscv_vremu_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                             size_t vl);
vuint32m2_t __riscv_vremu_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m4_t __riscv_vremu_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                             size_t vl);
vuint32m4_t __riscv_vremu_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                             size_t vl);
vuint32m8_t __riscv_vremu_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                             size_t vl);
vuint32m8_t __riscv_vremu_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                             size_t vl);
vuint64m1_t __riscv_vremu_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                             size_t vl);
vuint64m1_t __riscv_vremu_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m2_t __riscv_vremu_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                             size_t vl);
vuint64m2_t __riscv_vremu_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m4_t __riscv_vremu_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                             size_t vl);
vuint64m4_t __riscv_vremu_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                             size_t vl);
vuint64m8_t __riscv_vremu_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                             size_t vl);
vuint64m8_t __riscv_vremu_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                             size_t vl);
// masked functions
vint8mf8_t __riscv_vdiv_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vdiv_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vdiv_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vdiv_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vdiv_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vdiv_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vdiv_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vdiv_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vdiv_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vdiv_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vdiv_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vdiv_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vdiv_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vdiv_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vdiv_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vdiv_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vdiv_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vdiv_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vdiv_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vdiv_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vdiv_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vdiv_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vdiv_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vdiv_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vdiv_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vdiv_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vdiv_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vdiv_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vdiv_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vdiv_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vdiv_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vdiv_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vdiv_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vdiv_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vdiv_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vdiv_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vdiv_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vdiv_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vdiv_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vdiv_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vdiv_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vdiv_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vdiv_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vdiv_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vint8mf8_t __riscv_vrem_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vrem_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                            int8_t rs1, size_t vl);
vint8mf4_t __riscv_vrem_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrem_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                            int8_t rs1, size_t vl);
vint8mf2_t __riscv_vrem_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrem_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint8m1_t __riscv_vrem_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                           vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vrem_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                           size_t vl);
vint8m2_t __riscv_vrem_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                           vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vrem_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                           size_t vl);
vint8m4_t __riscv_vrem_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                           vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vrem_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                           size_t vl);
vint8m8_t __riscv_vrem_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                           vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vrem_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                           size_t vl);
vint16mf4_t __riscv_vrem_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vrem_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint16mf2_t __riscv_vrem_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vrem_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint16m1_t __riscv_vrem_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vrem_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint16m2_t __riscv_vrem_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vrem_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint16m4_t __riscv_vrem_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vrem_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint16m8_t __riscv_vrem_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vrem_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                            int16_t rs1, size_t vl);
vint32mf2_t __riscv_vrem_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vrem_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint32m1_t __riscv_vrem_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vrem_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint32m2_t __riscv_vrem_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vrem_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint32m4_t __riscv_vrem_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vrem_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint32m8_t __riscv_vrem_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vrem_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                            int32_t rs1, size_t vl);
vint64m1_t __riscv_vrem_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vrem_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                            int64_t rs1, size_t vl);
vint64m2_t __riscv_vrem_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vrem_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                            int64_t rs1, size_t vl);
vint64m4_t __riscv_vrem_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vrem_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                            int64_t rs1, size_t vl);
vint64m8_t __riscv_vrem_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vrem_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                            int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vdivu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vdivu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vdivu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vdivu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vdivu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vdivu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vdivu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vdivu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vdivu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vdivu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vdivu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vdivu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vdivu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vdivu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vdivu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vdivu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vdivu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vdivu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vdivu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vdivu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vdivu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vdivu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vdivu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vdivu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vdivu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vdivu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vdivu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vdivu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vdivu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vdivu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vdivu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vdivu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vdivu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vdivu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vdivu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vdivu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vdivu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vdivu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vdivu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vdivu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vdivu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vdivu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vdivu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vdivu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vremu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vremu_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vremu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vremu_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vremu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vremu_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vremu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vremu_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vremu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vremu_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vremu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vremu_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vremu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vremu_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                             uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vremu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vremu_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vremu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vremu_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vremu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vremu_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vremu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vremu_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vremu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vremu_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vremu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vremu_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                              uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vremu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vremu_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vremu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vremu_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vremu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vremu_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vremu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vremu_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vremu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vremu_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vremu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vremu_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vremu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vremu_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vremu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vremu_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                              uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vremu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vremu_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                              uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vdiv_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vdiv_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vdiv_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vdiv_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vdiv_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vdiv_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vdiv_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vdiv_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vdiv_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vdiv_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vdiv_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vdiv_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vdiv_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vdiv_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vdiv_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vdiv_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vdiv_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vdiv_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vdiv_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vdiv_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vdiv_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vdiv_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vdiv_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vdiv_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vdiv_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vdiv_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vdiv_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vdiv_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vdiv_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vdiv_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vdiv_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vdiv_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vdiv_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vdiv_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vdiv_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vdiv_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vdiv_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vdiv_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vdiv_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vdiv_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vdiv_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vdiv_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vdiv_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vdiv_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vint8mf8_t __riscv_vrem_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vrem_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint8mf4_t __riscv_vrem_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrem_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint8mf2_t __riscv_vrem_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrem_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint8m1_t __riscv_vrem_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vrem_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint8m2_t __riscv_vrem_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vrem_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint8m4_t __riscv_vrem_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vrem_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint8m8_t __riscv_vrem_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vrem_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                            int8_t rs1, size_t vl);
vint16mf4_t __riscv_vrem_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vrem_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint16mf2_t __riscv_vrem_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vrem_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint16m1_t __riscv_vrem_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vrem_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint16m2_t __riscv_vrem_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vrem_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint16m4_t __riscv_vrem_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vrem_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint16m8_t __riscv_vrem_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vrem_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                             int16_t rs1, size_t vl);
vint32mf2_t __riscv_vrem_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vrem_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint32m1_t __riscv_vrem_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vrem_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint32m2_t __riscv_vrem_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vrem_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint32m4_t __riscv_vrem_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vrem_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint32m8_t __riscv_vrem_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vrem_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                             int32_t rs1, size_t vl);
vint64m1_t __riscv_vrem_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vrem_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                             int64_t rs1, size_t vl);
vint64m2_t __riscv_vrem_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vrem_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                             int64_t rs1, size_t vl);
vint64m4_t __riscv_vrem_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vrem_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                             int64_t rs1, size_t vl);
vint64m8_t __riscv_vrem_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vrem_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                             int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vdivu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vdivu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vdivu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vdivu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vdivu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vdivu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vdivu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vdivu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vdivu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vdivu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vdivu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vdivu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vdivu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vdivu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vdivu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vdivu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vdivu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vdivu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vdivu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vdivu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vdivu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vdivu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vdivu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vdivu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vdivu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vdivu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vdivu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vdivu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vdivu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vdivu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vdivu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vdivu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vdivu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vdivu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vdivu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vdivu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vdivu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vdivu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vdivu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vdivu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vdivu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vdivu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vdivu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vdivu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vremu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vremu_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vremu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vremu_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vremu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vremu_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vremu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vremu_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vremu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vremu_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vremu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vremu_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vremu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vremu_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                              uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vremu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vremu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vremu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vremu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vremu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vremu_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vremu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vremu_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vremu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vremu_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vremu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vremu_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                               uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vremu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vremu_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vremu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vremu_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vremu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vremu_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vremu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vremu_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vremu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vremu_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vremu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vremu_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vremu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vremu_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vremu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vremu_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                               uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vremu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vremu_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                               uint64_t rs1, size_t vl);
// masked functions
vint8mf8_t __riscv_vdiv_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vdiv_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vdiv_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vdiv_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vdiv_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vdiv_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vdiv_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vdiv_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vdiv_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vdiv_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vdiv_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vdiv_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vdiv_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vdiv_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vdiv_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vdiv_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vdiv_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vdiv_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vdiv_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vdiv_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vdiv_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vdiv_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vdiv_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vdiv_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vdiv_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vdiv_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vdiv_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vdiv_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vdiv_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vdiv_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vdiv_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vdiv_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vdiv_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vdiv_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vdiv_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vdiv_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vdiv_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vdiv_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vdiv_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vdiv_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vdiv_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vdiv_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vdiv_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vdiv_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vint8mf8_t __riscv_vrem_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vrem_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs2,
                           int8_t rs1, size_t vl);
vint8mf4_t __riscv_vrem_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vrem_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs2,
                           int8_t rs1, size_t vl);
vint8mf2_t __riscv_vrem_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vrem_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs2,
                           int8_t rs1, size_t vl);
vint8m1_t __riscv_vrem_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2,
                          vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vrem_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                          size_t vl);
vint8m2_t __riscv_vrem_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2,
                          vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vrem_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                          size_t vl);
vint8m4_t __riscv_vrem_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2,
                          vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vrem_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                          size_t vl);
vint8m8_t __riscv_vrem_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2,
                          vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vrem_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                          size_t vl);
vint16mf4_t __riscv_vrem_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vrem_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs2,
                            int16_t rs1, size_t vl);
vint16mf2_t __riscv_vrem_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vrem_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint16m1_t __riscv_vrem_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vrem_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs2,
                           int16_t rs1, size_t vl);
vint16m2_t __riscv_vrem_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vrem_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs2,
                           int16_t rs1, size_t vl);
vint16m4_t __riscv_vrem_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vrem_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs2,
                           int16_t rs1, size_t vl);
vint16m8_t __riscv_vrem_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vrem_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs2,
                           int16_t rs1, size_t vl);
vint32mf2_t __riscv_vrem_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vrem_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint32m1_t __riscv_vrem_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vrem_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs2,
                           int32_t rs1, size_t vl);
vint32m2_t __riscv_vrem_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vrem_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs2,
                           int32_t rs1, size_t vl);
vint32m4_t __riscv_vrem_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vrem_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs2,
                           int32_t rs1, size_t vl);
vint32m8_t __riscv_vrem_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vrem_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs2,
                           int32_t rs1, size_t vl);
vint64m1_t __riscv_vrem_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vrem_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs2,
                           int64_t rs1, size_t vl);
vint64m2_t __riscv_vrem_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vrem_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs2,
                           int64_t rs1, size_t vl);
vint64m4_t __riscv_vrem_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vrem_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs2,
                           int64_t rs1, size_t vl);
vint64m8_t __riscv_vrem_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vrem_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs2,
                           int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vdivu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vdivu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vdivu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vdivu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vdivu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vdivu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vdivu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vdivu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vdivu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vdivu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vdivu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vdivu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vdivu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vdivu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vdivu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vdivu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vdivu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vdivu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vdivu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vdivu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vdivu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vdivu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vdivu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vdivu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vdivu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vdivu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vdivu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vdivu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vdivu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vdivu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vdivu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vdivu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vdivu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vdivu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vdivu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vdivu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vdivu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vdivu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vdivu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vdivu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vdivu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vdivu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vdivu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vdivu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);
vuint8mf8_t __riscv_vremu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vremu_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vremu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vremu_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs2,
                             uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vremu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vremu_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs2,
                             uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vremu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vremu_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vremu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vremu_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vremu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vremu_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs2,
                            uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vremu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vremu_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs2,
                            uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vremu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vremu_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs2,
                              uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vremu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vremu_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vremu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vremu_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vremu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vremu_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vremu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vremu_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs2,
                             uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vremu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vremu_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs2,
                             uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vremu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vremu_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vremu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vremu_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vremu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vremu_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vremu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vremu_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs2,
                             uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vremu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vremu_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs2,
                             uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vremu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vremu_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vremu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vremu_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vremu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vremu_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs2,
                             uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vremu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vremu_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs2,
                             uint64_t rs1, size_t vl);

Vector Widening Integer Multiply Intrinsics

vint16mf4_t __riscv_vwmul_tu(vint16mf4_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                             size_t vl);
vint16mf4_t __riscv_vwmul_tu(vint16mf4_t vd, vint8mf8_t vs2, int8_t rs1,
                             size_t vl);
vint16mf2_t __riscv_vwmul_tu(vint16mf2_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                             size_t vl);
vint16mf2_t __riscv_vwmul_tu(vint16mf2_t vd, vint8mf4_t vs2, int8_t rs1,
                             size_t vl);
vint16m1_t __riscv_vwmul_tu(vint16m1_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                            size_t vl);
vint16m1_t __riscv_vwmul_tu(vint16m1_t vd, vint8mf2_t vs2, int8_t rs1,
                            size_t vl);
vint16m2_t __riscv_vwmul_tu(vint16m2_t vd, vint8m1_t vs2, vint8m1_t vs1,
                            size_t vl);
vint16m2_t __riscv_vwmul_tu(vint16m2_t vd, vint8m1_t vs2, int8_t rs1,
                            size_t vl);
vint16m4_t __riscv_vwmul_tu(vint16m4_t vd, vint8m2_t vs2, vint8m2_t vs1,
                            size_t vl);
vint16m4_t __riscv_vwmul_tu(vint16m4_t vd, vint8m2_t vs2, int8_t rs1,
                            size_t vl);
vint16m8_t __riscv_vwmul_tu(vint16m8_t vd, vint8m4_t vs2, vint8m4_t vs1,
                            size_t vl);
vint16m8_t __riscv_vwmul_tu(vint16m8_t vd, vint8m4_t vs2, int8_t rs1,
                            size_t vl);
vint32mf2_t __riscv_vwmul_tu(vint32mf2_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                             size_t vl);
vint32mf2_t __riscv_vwmul_tu(vint32mf2_t vd, vint16mf4_t vs2, int16_t rs1,
                             size_t vl);
vint32m1_t __riscv_vwmul_tu(vint32m1_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                            size_t vl);
vint32m1_t __riscv_vwmul_tu(vint32m1_t vd, vint16mf2_t vs2, int16_t rs1,
                            size_t vl);
vint32m2_t __riscv_vwmul_tu(vint32m2_t vd, vint16m1_t vs2, vint16m1_t vs1,
                            size_t vl);
vint32m2_t __riscv_vwmul_tu(vint32m2_t vd, vint16m1_t vs2, int16_t rs1,
                            size_t vl);
vint32m4_t __riscv_vwmul_tu(vint32m4_t vd, vint16m2_t vs2, vint16m2_t vs1,
                            size_t vl);
vint32m4_t __riscv_vwmul_tu(vint32m4_t vd, vint16m2_t vs2, int16_t rs1,
                            size_t vl);
vint32m8_t __riscv_vwmul_tu(vint32m8_t vd, vint16m4_t vs2, vint16m4_t vs1,
                            size_t vl);
vint32m8_t __riscv_vwmul_tu(vint32m8_t vd, vint16m4_t vs2, int16_t rs1,
                            size_t vl);
vint64m1_t __riscv_vwmul_tu(vint64m1_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                            size_t vl);
vint64m1_t __riscv_vwmul_tu(vint64m1_t vd, vint32mf2_t vs2, int32_t rs1,
                            size_t vl);
vint64m2_t __riscv_vwmul_tu(vint64m2_t vd, vint32m1_t vs2, vint32m1_t vs1,
                            size_t vl);
vint64m2_t __riscv_vwmul_tu(vint64m2_t vd, vint32m1_t vs2, int32_t rs1,
                            size_t vl);
vint64m4_t __riscv_vwmul_tu(vint64m4_t vd, vint32m2_t vs2, vint32m2_t vs1,
                            size_t vl);
vint64m4_t __riscv_vwmul_tu(vint64m4_t vd, vint32m2_t vs2, int32_t rs1,
                            size_t vl);
vint64m8_t __riscv_vwmul_tu(vint64m8_t vd, vint32m4_t vs2, vint32m4_t vs1,
                            size_t vl);
vint64m8_t __riscv_vwmul_tu(vint64m8_t vd, vint32m4_t vs2, int32_t rs1,
                            size_t vl);
vint16mf4_t __riscv_vwmulsu_tu(vint16mf4_t vd, vint8mf8_t vs2, vuint8mf8_t vs1,
                               size_t vl);
vint16mf4_t __riscv_vwmulsu_tu(vint16mf4_t vd, vint8mf8_t vs2, uint8_t rs1,
                               size_t vl);
vint16mf2_t __riscv_vwmulsu_tu(vint16mf2_t vd, vint8mf4_t vs2, vuint8mf4_t vs1,
                               size_t vl);
vint16mf2_t __riscv_vwmulsu_tu(vint16mf2_t vd, vint8mf4_t vs2, uint8_t rs1,
                               size_t vl);
vint16m1_t __riscv_vwmulsu_tu(vint16m1_t vd, vint8mf2_t vs2, vuint8mf2_t vs1,
                              size_t vl);
vint16m1_t __riscv_vwmulsu_tu(vint16m1_t vd, vint8mf2_t vs2, uint8_t rs1,
                              size_t vl);
vint16m2_t __riscv_vwmulsu_tu(vint16m2_t vd, vint8m1_t vs2, vuint8m1_t vs1,
                              size_t vl);
vint16m2_t __riscv_vwmulsu_tu(vint16m2_t vd, vint8m1_t vs2, uint8_t rs1,
                              size_t vl);
vint16m4_t __riscv_vwmulsu_tu(vint16m4_t vd, vint8m2_t vs2, vuint8m2_t vs1,
                              size_t vl);
vint16m4_t __riscv_vwmulsu_tu(vint16m4_t vd, vint8m2_t vs2, uint8_t rs1,
                              size_t vl);
vint16m8_t __riscv_vwmulsu_tu(vint16m8_t vd, vint8m4_t vs2, vuint8m4_t vs1,
                              size_t vl);
vint16m8_t __riscv_vwmulsu_tu(vint16m8_t vd, vint8m4_t vs2, uint8_t rs1,
                              size_t vl);
vint32mf2_t __riscv_vwmulsu_tu(vint32mf2_t vd, vint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwmulsu_tu(vint32mf2_t vd, vint16mf4_t vs2, uint16_t rs1,
                               size_t vl);
vint32m1_t __riscv_vwmulsu_tu(vint32m1_t vd, vint16mf2_t vs2, vuint16mf2_t vs1,
                              size_t vl);
vint32m1_t __riscv_vwmulsu_tu(vint32m1_t vd, vint16mf2_t vs2, uint16_t rs1,
                              size_t vl);
vint32m2_t __riscv_vwmulsu_tu(vint32m2_t vd, vint16m1_t vs2, vuint16m1_t vs1,
                              size_t vl);
vint32m2_t __riscv_vwmulsu_tu(vint32m2_t vd, vint16m1_t vs2, uint16_t rs1,
                              size_t vl);
vint32m4_t __riscv_vwmulsu_tu(vint32m4_t vd, vint16m2_t vs2, vuint16m2_t vs1,
                              size_t vl);
vint32m4_t __riscv_vwmulsu_tu(vint32m4_t vd, vint16m2_t vs2, uint16_t rs1,
                              size_t vl);
vint32m8_t __riscv_vwmulsu_tu(vint32m8_t vd, vint16m4_t vs2, vuint16m4_t vs1,
                              size_t vl);
vint32m8_t __riscv_vwmulsu_tu(vint32m8_t vd, vint16m4_t vs2, uint16_t rs1,
                              size_t vl);
vint64m1_t __riscv_vwmulsu_tu(vint64m1_t vd, vint32mf2_t vs2, vuint32mf2_t vs1,
                              size_t vl);
vint64m1_t __riscv_vwmulsu_tu(vint64m1_t vd, vint32mf2_t vs2, uint32_t rs1,
                              size_t vl);
vint64m2_t __riscv_vwmulsu_tu(vint64m2_t vd, vint32m1_t vs2, vuint32m1_t vs1,
                              size_t vl);
vint64m2_t __riscv_vwmulsu_tu(vint64m2_t vd, vint32m1_t vs2, uint32_t rs1,
                              size_t vl);
vint64m4_t __riscv_vwmulsu_tu(vint64m4_t vd, vint32m2_t vs2, vuint32m2_t vs1,
                              size_t vl);
vint64m4_t __riscv_vwmulsu_tu(vint64m4_t vd, vint32m2_t vs2, uint32_t rs1,
                              size_t vl);
vint64m8_t __riscv_vwmulsu_tu(vint64m8_t vd, vint32m4_t vs2, vuint32m4_t vs1,
                              size_t vl);
vint64m8_t __riscv_vwmulsu_tu(vint64m8_t vd, vint32m4_t vs2, uint32_t rs1,
                              size_t vl);
vuint16mf4_t __riscv_vwmulu_tu(vuint16mf4_t vd, vuint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwmulu_tu(vuint16mf4_t vd, vuint8mf8_t vs2, uint8_t rs1,
                               size_t vl);
vuint16mf2_t __riscv_vwmulu_tu(vuint16mf2_t vd, vuint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwmulu_tu(vuint16mf2_t vd, vuint8mf4_t vs2, uint8_t rs1,
                               size_t vl);
vuint16m1_t __riscv_vwmulu_tu(vuint16m1_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                              size_t vl);
vuint16m1_t __riscv_vwmulu_tu(vuint16m1_t vd, vuint8mf2_t vs2, uint8_t rs1,
                              size_t vl);
vuint16m2_t __riscv_vwmulu_tu(vuint16m2_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                              size_t vl);
vuint16m2_t __riscv_vwmulu_tu(vuint16m2_t vd, vuint8m1_t vs2, uint8_t rs1,
                              size_t vl);
vuint16m4_t __riscv_vwmulu_tu(vuint16m4_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                              size_t vl);
vuint16m4_t __riscv_vwmulu_tu(vuint16m4_t vd, vuint8m2_t vs2, uint8_t rs1,
                              size_t vl);
vuint16m8_t __riscv_vwmulu_tu(vuint16m8_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                              size_t vl);
vuint16m8_t __riscv_vwmulu_tu(vuint16m8_t vd, vuint8m4_t vs2, uint8_t rs1,
                              size_t vl);
vuint32mf2_t __riscv_vwmulu_tu(vuint32mf2_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwmulu_tu(vuint32mf2_t vd, vuint16mf4_t vs2, uint16_t rs1,
                               size_t vl);
vuint32m1_t __riscv_vwmulu_tu(vuint32m1_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwmulu_tu(vuint32m1_t vd, vuint16mf2_t vs2, uint16_t rs1,
                              size_t vl);
vuint32m2_t __riscv_vwmulu_tu(vuint32m2_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                              size_t vl);
vuint32m2_t __riscv_vwmulu_tu(vuint32m2_t vd, vuint16m1_t vs2, uint16_t rs1,
                              size_t vl);
vuint32m4_t __riscv_vwmulu_tu(vuint32m4_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                              size_t vl);
vuint32m4_t __riscv_vwmulu_tu(vuint32m4_t vd, vuint16m2_t vs2, uint16_t rs1,
                              size_t vl);
vuint32m8_t __riscv_vwmulu_tu(vuint32m8_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                              size_t vl);
vuint32m8_t __riscv_vwmulu_tu(vuint32m8_t vd, vuint16m4_t vs2, uint16_t rs1,
                              size_t vl);
vuint64m1_t __riscv_vwmulu_tu(vuint64m1_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwmulu_tu(vuint64m1_t vd, vuint32mf2_t vs2, uint32_t rs1,
                              size_t vl);
vuint64m2_t __riscv_vwmulu_tu(vuint64m2_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                              size_t vl);
vuint64m2_t __riscv_vwmulu_tu(vuint64m2_t vd, vuint32m1_t vs2, uint32_t rs1,
                              size_t vl);
vuint64m4_t __riscv_vwmulu_tu(vuint64m4_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                              size_t vl);
vuint64m4_t __riscv_vwmulu_tu(vuint64m4_t vd, vuint32m2_t vs2, uint32_t rs1,
                              size_t vl);
vuint64m8_t __riscv_vwmulu_tu(vuint64m8_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                              size_t vl);
vuint64m8_t __riscv_vwmulu_tu(vuint64m8_t vd, vuint32m4_t vs2, uint32_t rs1,
                              size_t vl);
// masked functions
vint16mf4_t __riscv_vwmul_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                              vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwmul_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                              int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwmul_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                              vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwmul_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                              int8_t rs1, size_t vl);
vint16m1_t __riscv_vwmul_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                             vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwmul_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                             int8_t rs1, size_t vl);
vint16m2_t __riscv_vwmul_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                             vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwmul_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                             int8_t rs1, size_t vl);
vint16m4_t __riscv_vwmul_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                             vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwmul_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                             int8_t rs1, size_t vl);
vint16m8_t __riscv_vwmul_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                             vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwmul_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                             int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwmul_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                              vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwmul_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                              int16_t rs1, size_t vl);
vint32m1_t __riscv_vwmul_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                             vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwmul_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                             int16_t rs1, size_t vl);
vint32m2_t __riscv_vwmul_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                             vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwmul_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                             int16_t rs1, size_t vl);
vint32m4_t __riscv_vwmul_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                             vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwmul_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                             int16_t rs1, size_t vl);
vint32m8_t __riscv_vwmul_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                             vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwmul_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                             int16_t rs1, size_t vl);
vint64m1_t __riscv_vwmul_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                             vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwmul_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                             int32_t rs1, size_t vl);
vint64m2_t __riscv_vwmul_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                             vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwmul_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                             int32_t rs1, size_t vl);
vint64m4_t __riscv_vwmul_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                             vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwmul_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                             int32_t rs1, size_t vl);
vint64m8_t __riscv_vwmul_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                             vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwmul_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                             int32_t rs1, size_t vl);
vint16mf4_t __riscv_vwmulsu_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                vuint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwmulsu_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                uint8_t rs1, size_t vl);
vint16mf2_t __riscv_vwmulsu_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                vuint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwmulsu_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                uint8_t rs1, size_t vl);
vint16m1_t __riscv_vwmulsu_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwmulsu_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vint16m2_t __riscv_vwmulsu_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                               vuint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwmulsu_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                               uint8_t rs1, size_t vl);
vint16m4_t __riscv_vwmulsu_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                               vuint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwmulsu_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                               uint8_t rs1, size_t vl);
vint16m8_t __riscv_vwmulsu_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                               vuint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwmulsu_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                               uint8_t rs1, size_t vl);
vint32mf2_t __riscv_vwmulsu_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwmulsu_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vint32m1_t __riscv_vwmulsu_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwmulsu_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vint32m2_t __riscv_vwmulsu_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwmulsu_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vint32m4_t __riscv_vwmulsu_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwmulsu_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vint32m8_t __riscv_vwmulsu_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwmulsu_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vint64m1_t __riscv_vwmulsu_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwmulsu_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vint64m2_t __riscv_vwmulsu_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwmulsu_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vint64m4_t __riscv_vwmulsu_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwmulsu_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vint64m8_t __riscv_vwmulsu_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwmulsu_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                               uint32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwmulu_tum(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs2,
                                vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwmulu_tum(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs2,
                                uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwmulu_tum(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs2,
                                vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwmulu_tum(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs2,
                                uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwmulu_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                               vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwmulu_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                               uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwmulu_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                               vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwmulu_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                               uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwmulu_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                               vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwmulu_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                               uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwmulu_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                               vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwmulu_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                               uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwmulu_tum(vbool64_t vm, vuint32mf2_t vd, vuint16mf4_t vs2,
                                vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwmulu_tum(vbool64_t vm, vuint32mf2_t vd, vuint16mf4_t vs2,
                                uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwmulu_tum(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwmulu_tum(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                               uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwmulu_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                               vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwmulu_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                               uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwmulu_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                               vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwmulu_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                               uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwmulu_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                               vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwmulu_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                               uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwmulu_tum(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwmulu_tum(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwmulu_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                               vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwmulu_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwmulu_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                               vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwmulu_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                               uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwmulu_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                               vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwmulu_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                               uint32_t rs1, size_t vl);
// masked functions
vint16mf4_t __riscv_vwmul_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                               vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwmul_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                               int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwmul_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                               vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwmul_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                               int8_t rs1, size_t vl);
vint16m1_t __riscv_vwmul_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                              vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwmul_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                              int8_t rs1, size_t vl);
vint16m2_t __riscv_vwmul_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                              vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwmul_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                              int8_t rs1, size_t vl);
vint16m4_t __riscv_vwmul_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                              vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwmul_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                              int8_t rs1, size_t vl);
vint16m8_t __riscv_vwmul_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                              vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwmul_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                              int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwmul_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                               vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwmul_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                               int16_t rs1, size_t vl);
vint32m1_t __riscv_vwmul_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                              vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwmul_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                              int16_t rs1, size_t vl);
vint32m2_t __riscv_vwmul_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                              vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwmul_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                              int16_t rs1, size_t vl);
vint32m4_t __riscv_vwmul_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                              vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwmul_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                              int16_t rs1, size_t vl);
vint32m8_t __riscv_vwmul_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                              vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwmul_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                              int16_t rs1, size_t vl);
vint64m1_t __riscv_vwmul_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                              vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwmul_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                              int32_t rs1, size_t vl);
vint64m2_t __riscv_vwmul_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                              vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwmul_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                              int32_t rs1, size_t vl);
vint64m4_t __riscv_vwmul_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                              vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwmul_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                              int32_t rs1, size_t vl);
vint64m8_t __riscv_vwmul_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                              vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwmul_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                              int32_t rs1, size_t vl);
vint16mf4_t __riscv_vwmulsu_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 vuint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwmulsu_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                                 uint8_t rs1, size_t vl);
vint16mf2_t __riscv_vwmulsu_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 vuint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwmulsu_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                                 uint8_t rs1, size_t vl);
vint16m1_t __riscv_vwmulsu_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                vuint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwmulsu_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                                uint8_t rs1, size_t vl);
vint16m2_t __riscv_vwmulsu_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                vuint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwmulsu_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                                uint8_t rs1, size_t vl);
vint16m4_t __riscv_vwmulsu_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                vuint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwmulsu_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                                uint8_t rs1, size_t vl);
vint16m8_t __riscv_vwmulsu_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                vuint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwmulsu_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                                uint8_t rs1, size_t vl);
vint32mf2_t __riscv_vwmulsu_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 vuint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwmulsu_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                                 uint16_t rs1, size_t vl);
vint32m1_t __riscv_vwmulsu_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwmulsu_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vint32m2_t __riscv_vwmulsu_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                vuint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwmulsu_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                                uint16_t rs1, size_t vl);
vint32m4_t __riscv_vwmulsu_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                vuint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwmulsu_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                                uint16_t rs1, size_t vl);
vint32m8_t __riscv_vwmulsu_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                vuint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwmulsu_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                                uint16_t rs1, size_t vl);
vint64m1_t __riscv_vwmulsu_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwmulsu_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vint64m2_t __riscv_vwmulsu_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                vuint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwmulsu_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                                uint32_t rs1, size_t vl);
vint64m4_t __riscv_vwmulsu_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                vuint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwmulsu_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                                uint32_t rs1, size_t vl);
vint64m8_t __riscv_vwmulsu_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                vuint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwmulsu_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                                uint32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwmulu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs2,
                                 vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwmulu_tumu(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwmulu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs2,
                                 vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwmulu_tumu(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs2,
                                 uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwmulu_tumu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwmulu_tumu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                                uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwmulu_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwmulu_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                                uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwmulu_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwmulu_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                                uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwmulu_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwmulu_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                                uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwmulu_tumu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint16mf4_t vs2, vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwmulu_tumu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint16mf4_t vs2, uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwmulu_tumu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwmulu_tumu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                                uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwmulu_tumu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwmulu_tumu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                                uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwmulu_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwmulu_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                                uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwmulu_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwmulu_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                                uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwmulu_tumu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwmulu_tumu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                                uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwmulu_tumu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwmulu_tumu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                                uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwmulu_tumu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwmulu_tumu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                                uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwmulu_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwmulu_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                                uint32_t rs1, size_t vl);
// masked functions
vint16mf4_t __riscv_vwmul_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                             vint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwmul_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                             int8_t rs1, size_t vl);
vint16mf2_t __riscv_vwmul_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                             vint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwmul_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                             int8_t rs1, size_t vl);
vint16m1_t __riscv_vwmul_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                            vint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwmul_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                            int8_t rs1, size_t vl);
vint16m2_t __riscv_vwmul_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                            vint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwmul_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                            int8_t rs1, size_t vl);
vint16m4_t __riscv_vwmul_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                            vint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwmul_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                            int8_t rs1, size_t vl);
vint16m8_t __riscv_vwmul_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                            vint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwmul_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                            int8_t rs1, size_t vl);
vint32mf2_t __riscv_vwmul_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                             vint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwmul_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                             int16_t rs1, size_t vl);
vint32m1_t __riscv_vwmul_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                            vint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwmul_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                            int16_t rs1, size_t vl);
vint32m2_t __riscv_vwmul_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                            vint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwmul_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                            int16_t rs1, size_t vl);
vint32m4_t __riscv_vwmul_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                            vint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwmul_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                            int16_t rs1, size_t vl);
vint32m8_t __riscv_vwmul_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                            vint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwmul_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                            int16_t rs1, size_t vl);
vint64m1_t __riscv_vwmul_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                            vint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwmul_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                            int32_t rs1, size_t vl);
vint64m2_t __riscv_vwmul_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                            vint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwmul_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                            int32_t rs1, size_t vl);
vint64m4_t __riscv_vwmul_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                            vint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwmul_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                            int32_t rs1, size_t vl);
vint64m8_t __riscv_vwmul_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                            vint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwmul_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                            int32_t rs1, size_t vl);
vint16mf4_t __riscv_vwmulsu_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vint16mf4_t __riscv_vwmulsu_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vint16mf2_t __riscv_vwmulsu_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vint16mf2_t __riscv_vwmulsu_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vint16m1_t __riscv_vwmulsu_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vint16m1_t __riscv_vwmulsu_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vint16m2_t __riscv_vwmulsu_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vint16m2_t __riscv_vwmulsu_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vint16m4_t __riscv_vwmulsu_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vint16m4_t __riscv_vwmulsu_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vint16m8_t __riscv_vwmulsu_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vint16m8_t __riscv_vwmulsu_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vint32mf2_t __riscv_vwmulsu_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vint32mf2_t __riscv_vwmulsu_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vint32m1_t __riscv_vwmulsu_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vint32m1_t __riscv_vwmulsu_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vint32m2_t __riscv_vwmulsu_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vint32m2_t __riscv_vwmulsu_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vint32m4_t __riscv_vwmulsu_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vint32m4_t __riscv_vwmulsu_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vint32m8_t __riscv_vwmulsu_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vint32m8_t __riscv_vwmulsu_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vint64m1_t __riscv_vwmulsu_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vint64m1_t __riscv_vwmulsu_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vint64m2_t __riscv_vwmulsu_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vint64m2_t __riscv_vwmulsu_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vint64m4_t __riscv_vwmulsu_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vint64m4_t __riscv_vwmulsu_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vint64m8_t __riscv_vwmulsu_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vint64m8_t __riscv_vwmulsu_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs2,
                              uint32_t rs1, size_t vl);
vuint16mf4_t __riscv_vwmulu_mu(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs2,
                               vuint8mf8_t vs1, size_t vl);
vuint16mf4_t __riscv_vwmulu_mu(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs2,
                               uint8_t rs1, size_t vl);
vuint16mf2_t __riscv_vwmulu_mu(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs2,
                               vuint8mf4_t vs1, size_t vl);
vuint16mf2_t __riscv_vwmulu_mu(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs2,
                               uint8_t rs1, size_t vl);
vuint16m1_t __riscv_vwmulu_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                              vuint8mf2_t vs1, size_t vl);
vuint16m1_t __riscv_vwmulu_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs2,
                              uint8_t rs1, size_t vl);
vuint16m2_t __riscv_vwmulu_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                              vuint8m1_t vs1, size_t vl);
vuint16m2_t __riscv_vwmulu_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs2,
                              uint8_t rs1, size_t vl);
vuint16m4_t __riscv_vwmulu_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                              vuint8m2_t vs1, size_t vl);
vuint16m4_t __riscv_vwmulu_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs2,
                              uint8_t rs1, size_t vl);
vuint16m8_t __riscv_vwmulu_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                              vuint8m4_t vs1, size_t vl);
vuint16m8_t __riscv_vwmulu_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs2,
                              uint8_t rs1, size_t vl);
vuint32mf2_t __riscv_vwmulu_mu(vbool64_t vm, vuint32mf2_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, size_t vl);
vuint32mf2_t __riscv_vwmulu_mu(vbool64_t vm, vuint32mf2_t vd, vuint16mf4_t vs2,
                               uint16_t rs1, size_t vl);
vuint32m1_t __riscv_vwmulu_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                              vuint16mf2_t vs1, size_t vl);
vuint32m1_t __riscv_vwmulu_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs2,
                              uint16_t rs1, size_t vl);
vuint32m2_t __riscv_vwmulu_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                              vuint16m1_t vs1, size_t vl);
vuint32m2_t __riscv_vwmulu_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs2,
                              uint16_t rs1, size_t vl);
vuint32m4_t __riscv_vwmulu_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                              vuint16m2_t vs1, size_t vl);
vuint32m4_t __riscv_vwmulu_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs2,
                              uint16_t rs1, size_t vl);
vuint32m8_t __riscv_vwmulu_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                              vuint16m4_t vs1, size_t vl);
vuint32m8_t __riscv_vwmulu_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs2,
                              uint16_t rs1, size_t vl);
vuint64m1_t __riscv_vwmulu_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                              vuint32mf2_t vs1, size_t vl);
vuint64m1_t __riscv_vwmulu_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m2_t __riscv_vwmulu_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                              vuint32m1_t vs1, size_t vl);
vuint64m2_t __riscv_vwmulu_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m4_t __riscv_vwmulu_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                              vuint32m2_t vs1, size_t vl);
vuint64m4_t __riscv_vwmulu_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs2,
                              uint32_t rs1, size_t vl);
vuint64m8_t __riscv_vwmulu_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                              vuint32m4_t vs1, size_t vl);
vuint64m8_t __riscv_vwmulu_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs2,
                              uint32_t rs1, size_t vl);

Vector Single-Width Integer Multiply-Add Intrinsics

vint8mf8_t __riscv_vmacc_tu(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2,
                            size_t vl);
vint8mf8_t __riscv_vmacc_tu(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2,
                            size_t vl);
vint8mf4_t __riscv_vmacc_tu(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2,
                            size_t vl);
vint8mf4_t __riscv_vmacc_tu(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2,
                            size_t vl);
vint8mf2_t __riscv_vmacc_tu(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2,
                            size_t vl);
vint8mf2_t __riscv_vmacc_tu(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2,
                            size_t vl);
vint8m1_t __riscv_vmacc_tu(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2,
                           size_t vl);
vint8m1_t __riscv_vmacc_tu(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vmacc_tu(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2,
                           size_t vl);
vint8m2_t __riscv_vmacc_tu(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vmacc_tu(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2,
                           size_t vl);
vint8m4_t __riscv_vmacc_tu(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vmacc_tu(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2,
                           size_t vl);
vint8m8_t __riscv_vmacc_tu(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vmacc_tu(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2,
                             size_t vl);
vint16mf4_t __riscv_vmacc_tu(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
                             size_t vl);
vint16mf2_t __riscv_vmacc_tu(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2,
                             size_t vl);
vint16mf2_t __riscv_vmacc_tu(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
                             size_t vl);
vint16m1_t __riscv_vmacc_tu(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2,
                            size_t vl);
vint16m1_t __riscv_vmacc_tu(vint16m1_t vd, int16_t rs1, vint16m1_t vs2,
                            size_t vl);
vint16m2_t __riscv_vmacc_tu(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2,
                            size_t vl);
vint16m2_t __riscv_vmacc_tu(vint16m2_t vd, int16_t rs1, vint16m2_t vs2,
                            size_t vl);
vint16m4_t __riscv_vmacc_tu(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2,
                            size_t vl);
vint16m4_t __riscv_vmacc_tu(vint16m4_t vd, int16_t rs1, vint16m4_t vs2,
                            size_t vl);
vint16m8_t __riscv_vmacc_tu(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2,
                            size_t vl);
vint16m8_t __riscv_vmacc_tu(vint16m8_t vd, int16_t rs1, vint16m8_t vs2,
                            size_t vl);
vint32mf2_t __riscv_vmacc_tu(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2,
                             size_t vl);
vint32mf2_t __riscv_vmacc_tu(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
                             size_t vl);
vint32m1_t __riscv_vmacc_tu(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2,
                            size_t vl);
vint32m1_t __riscv_vmacc_tu(vint32m1_t vd, int32_t rs1, vint32m1_t vs2,
                            size_t vl);
vint32m2_t __riscv_vmacc_tu(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2,
                            size_t vl);
vint32m2_t __riscv_vmacc_tu(vint32m2_t vd, int32_t rs1, vint32m2_t vs2,
                            size_t vl);
vint32m4_t __riscv_vmacc_tu(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2,
                            size_t vl);
vint32m4_t __riscv_vmacc_tu(vint32m4_t vd, int32_t rs1, vint32m4_t vs2,
                            size_t vl);
vint32m8_t __riscv_vmacc_tu(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2,
                            size_t vl);
vint32m8_t __riscv_vmacc_tu(vint32m8_t vd, int32_t rs1, vint32m8_t vs2,
                            size_t vl);
vint64m1_t __riscv_vmacc_tu(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2,
                            size_t vl);
vint64m1_t __riscv_vmacc_tu(vint64m1_t vd, int64_t rs1, vint64m1_t vs2,
                            size_t vl);
vint64m2_t __riscv_vmacc_tu(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2,
                            size_t vl);
vint64m2_t __riscv_vmacc_tu(vint64m2_t vd, int64_t rs1, vint64m2_t vs2,
                            size_t vl);
vint64m4_t __riscv_vmacc_tu(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2,
                            size_t vl);
vint64m4_t __riscv_vmacc_tu(vint64m4_t vd, int64_t rs1, vint64m4_t vs2,
                            size_t vl);
vint64m8_t __riscv_vmacc_tu(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2,
                            size_t vl);
vint64m8_t __riscv_vmacc_tu(vint64m8_t vd, int64_t rs1, vint64m8_t vs2,
                            size_t vl);
vint8mf8_t __riscv_vnmsac_tu(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2,
                             size_t vl);
vint8mf8_t __riscv_vnmsac_tu(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2,
                             size_t vl);
vint8mf4_t __riscv_vnmsac_tu(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2,
                             size_t vl);
vint8mf4_t __riscv_vnmsac_tu(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2,
                             size_t vl);
vint8mf2_t __riscv_vnmsac_tu(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2,
                             size_t vl);
vint8mf2_t __riscv_vnmsac_tu(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2,
                             size_t vl);
vint8m1_t __riscv_vnmsac_tu(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2,
                            size_t vl);
vint8m1_t __riscv_vnmsac_tu(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsac_tu(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2,
                            size_t vl);
vint8m2_t __riscv_vnmsac_tu(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsac_tu(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2,
                            size_t vl);
vint8m4_t __riscv_vnmsac_tu(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsac_tu(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2,
                            size_t vl);
vint8m8_t __riscv_vnmsac_tu(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsac_tu(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2,
                              size_t vl);
vint16mf4_t __riscv_vnmsac_tu(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
                              size_t vl);
vint16mf2_t __riscv_vnmsac_tu(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2,
                              size_t vl);
vint16mf2_t __riscv_vnmsac_tu(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
                              size_t vl);
vint16m1_t __riscv_vnmsac_tu(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2,
                             size_t vl);
vint16m1_t __riscv_vnmsac_tu(vint16m1_t vd, int16_t rs1, vint16m1_t vs2,
                             size_t vl);
vint16m2_t __riscv_vnmsac_tu(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2,
                             size_t vl);
vint16m2_t __riscv_vnmsac_tu(vint16m2_t vd, int16_t rs1, vint16m2_t vs2,
                             size_t vl);
vint16m4_t __riscv_vnmsac_tu(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2,
                             size_t vl);
vint16m4_t __riscv_vnmsac_tu(vint16m4_t vd, int16_t rs1, vint16m4_t vs2,
                             size_t vl);
vint16m8_t __riscv_vnmsac_tu(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2,
                             size_t vl);
vint16m8_t __riscv_vnmsac_tu(vint16m8_t vd, int16_t rs1, vint16m8_t vs2,
                             size_t vl);
vint32mf2_t __riscv_vnmsac_tu(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2,
                              size_t vl);
vint32mf2_t __riscv_vnmsac_tu(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
                              size_t vl);
vint32m1_t __riscv_vnmsac_tu(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2,
                             size_t vl);
vint32m1_t __riscv_vnmsac_tu(vint32m1_t vd, int32_t rs1, vint32m1_t vs2,
                             size_t vl);
vint32m2_t __riscv_vnmsac_tu(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2,
                             size_t vl);
vint32m2_t __riscv_vnmsac_tu(vint32m2_t vd, int32_t rs1, vint32m2_t vs2,
                             size_t vl);
vint32m4_t __riscv_vnmsac_tu(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2,
                             size_t vl);
vint32m4_t __riscv_vnmsac_tu(vint32m4_t vd, int32_t rs1, vint32m4_t vs2,
                             size_t vl);
vint32m8_t __riscv_vnmsac_tu(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2,
                             size_t vl);
vint32m8_t __riscv_vnmsac_tu(vint32m8_t vd, int32_t rs1, vint32m8_t vs2,
                             size_t vl);
vint64m1_t __riscv_vnmsac_tu(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2,
                             size_t vl);
vint64m1_t __riscv_vnmsac_tu(vint64m1_t vd, int64_t rs1, vint64m1_t vs2,
                             size_t vl);
vint64m2_t __riscv_vnmsac_tu(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2,
                             size_t vl);
vint64m2_t __riscv_vnmsac_tu(vint64m2_t vd, int64_t rs1, vint64m2_t vs2,
                             size_t vl);
vint64m4_t __riscv_vnmsac_tu(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2,
                             size_t vl);
vint64m4_t __riscv_vnmsac_tu(vint64m4_t vd, int64_t rs1, vint64m4_t vs2,
                             size_t vl);
vint64m8_t __riscv_vnmsac_tu(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2,
                             size_t vl);
vint64m8_t __riscv_vnmsac_tu(vint64m8_t vd, int64_t rs1, vint64m8_t vs2,
                             size_t vl);
vint8mf8_t __riscv_vmadd_tu(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2,
                            size_t vl);
vint8mf8_t __riscv_vmadd_tu(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2,
                            size_t vl);
vint8mf4_t __riscv_vmadd_tu(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2,
                            size_t vl);
vint8mf4_t __riscv_vmadd_tu(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2,
                            size_t vl);
vint8mf2_t __riscv_vmadd_tu(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2,
                            size_t vl);
vint8mf2_t __riscv_vmadd_tu(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2,
                            size_t vl);
vint8m1_t __riscv_vmadd_tu(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2,
                           size_t vl);
vint8m1_t __riscv_vmadd_tu(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vmadd_tu(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2,
                           size_t vl);
vint8m2_t __riscv_vmadd_tu(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vmadd_tu(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2,
                           size_t vl);
vint8m4_t __riscv_vmadd_tu(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vmadd_tu(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2,
                           size_t vl);
vint8m8_t __riscv_vmadd_tu(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vmadd_tu(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2,
                             size_t vl);
vint16mf4_t __riscv_vmadd_tu(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
                             size_t vl);
vint16mf2_t __riscv_vmadd_tu(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2,
                             size_t vl);
vint16mf2_t __riscv_vmadd_tu(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
                             size_t vl);
vint16m1_t __riscv_vmadd_tu(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2,
                            size_t vl);
vint16m1_t __riscv_vmadd_tu(vint16m1_t vd, int16_t rs1, vint16m1_t vs2,
                            size_t vl);
vint16m2_t __riscv_vmadd_tu(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2,
                            size_t vl);
vint16m2_t __riscv_vmadd_tu(vint16m2_t vd, int16_t rs1, vint16m2_t vs2,
                            size_t vl);
vint16m4_t __riscv_vmadd_tu(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2,
                            size_t vl);
vint16m4_t __riscv_vmadd_tu(vint16m4_t vd, int16_t rs1, vint16m4_t vs2,
                            size_t vl);
vint16m8_t __riscv_vmadd_tu(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2,
                            size_t vl);
vint16m8_t __riscv_vmadd_tu(vint16m8_t vd, int16_t rs1, vint16m8_t vs2,
                            size_t vl);
vint32mf2_t __riscv_vmadd_tu(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2,
                             size_t vl);
vint32mf2_t __riscv_vmadd_tu(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
                             size_t vl);
vint32m1_t __riscv_vmadd_tu(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2,
                            size_t vl);
vint32m1_t __riscv_vmadd_tu(vint32m1_t vd, int32_t rs1, vint32m1_t vs2,
                            size_t vl);
vint32m2_t __riscv_vmadd_tu(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2,
                            size_t vl);
vint32m2_t __riscv_vmadd_tu(vint32m2_t vd, int32_t rs1, vint32m2_t vs2,
                            size_t vl);
vint32m4_t __riscv_vmadd_tu(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2,
                            size_t vl);
vint32m4_t __riscv_vmadd_tu(vint32m4_t vd, int32_t rs1, vint32m4_t vs2,
                            size_t vl);
vint32m8_t __riscv_vmadd_tu(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2,
                            size_t vl);
vint32m8_t __riscv_vmadd_tu(vint32m8_t vd, int32_t rs1, vint32m8_t vs2,
                            size_t vl);
vint64m1_t __riscv_vmadd_tu(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2,
                            size_t vl);
vint64m1_t __riscv_vmadd_tu(vint64m1_t vd, int64_t rs1, vint64m1_t vs2,
                            size_t vl);
vint64m2_t __riscv_vmadd_tu(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2,
                            size_t vl);
vint64m2_t __riscv_vmadd_tu(vint64m2_t vd, int64_t rs1, vint64m2_t vs2,
                            size_t vl);
vint64m4_t __riscv_vmadd_tu(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2,
                            size_t vl);
vint64m4_t __riscv_vmadd_tu(vint64m4_t vd, int64_t rs1, vint64m4_t vs2,
                            size_t vl);
vint64m8_t __riscv_vmadd_tu(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2,
                            size_t vl);
vint64m8_t __riscv_vmadd_tu(vint64m8_t vd, int64_t rs1, vint64m8_t vs2,
                            size_t vl);
vint8mf8_t __riscv_vnmsub_tu(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2,
                             size_t vl);
vint8mf8_t __riscv_vnmsub_tu(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2,
                             size_t vl);
vint8mf4_t __riscv_vnmsub_tu(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2,
                             size_t vl);
vint8mf4_t __riscv_vnmsub_tu(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2,
                             size_t vl);
vint8mf2_t __riscv_vnmsub_tu(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2,
                             size_t vl);
vint8mf2_t __riscv_vnmsub_tu(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2,
                             size_t vl);
vint8m1_t __riscv_vnmsub_tu(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2,
                            size_t vl);
vint8m1_t __riscv_vnmsub_tu(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsub_tu(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2,
                            size_t vl);
vint8m2_t __riscv_vnmsub_tu(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsub_tu(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2,
                            size_t vl);
vint8m4_t __riscv_vnmsub_tu(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsub_tu(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2,
                            size_t vl);
vint8m8_t __riscv_vnmsub_tu(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsub_tu(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2,
                              size_t vl);
vint16mf4_t __riscv_vnmsub_tu(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
                              size_t vl);
vint16mf2_t __riscv_vnmsub_tu(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2,
                              size_t vl);
vint16mf2_t __riscv_vnmsub_tu(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
                              size_t vl);
vint16m1_t __riscv_vnmsub_tu(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2,
                             size_t vl);
vint16m1_t __riscv_vnmsub_tu(vint16m1_t vd, int16_t rs1, vint16m1_t vs2,
                             size_t vl);
vint16m2_t __riscv_vnmsub_tu(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2,
                             size_t vl);
vint16m2_t __riscv_vnmsub_tu(vint16m2_t vd, int16_t rs1, vint16m2_t vs2,
                             size_t vl);
vint16m4_t __riscv_vnmsub_tu(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2,
                             size_t vl);
vint16m4_t __riscv_vnmsub_tu(vint16m4_t vd, int16_t rs1, vint16m4_t vs2,
                             size_t vl);
vint16m8_t __riscv_vnmsub_tu(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2,
                             size_t vl);
vint16m8_t __riscv_vnmsub_tu(vint16m8_t vd, int16_t rs1, vint16m8_t vs2,
                             size_t vl);
vint32mf2_t __riscv_vnmsub_tu(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2,
                              size_t vl);
vint32mf2_t __riscv_vnmsub_tu(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
                              size_t vl);
vint32m1_t __riscv_vnmsub_tu(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2,
                             size_t vl);
vint32m1_t __riscv_vnmsub_tu(vint32m1_t vd, int32_t rs1, vint32m1_t vs2,
                             size_t vl);
vint32m2_t __riscv_vnmsub_tu(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2,
                             size_t vl);
vint32m2_t __riscv_vnmsub_tu(vint32m2_t vd, int32_t rs1, vint32m2_t vs2,
                             size_t vl);
vint32m4_t __riscv_vnmsub_tu(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2,
                             size_t vl);
vint32m4_t __riscv_vnmsub_tu(vint32m4_t vd, int32_t rs1, vint32m4_t vs2,
                             size_t vl);
vint32m8_t __riscv_vnmsub_tu(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2,
                             size_t vl);
vint32m8_t __riscv_vnmsub_tu(vint32m8_t vd, int32_t rs1, vint32m8_t vs2,
                             size_t vl);
vint64m1_t __riscv_vnmsub_tu(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2,
                             size_t vl);
vint64m1_t __riscv_vnmsub_tu(vint64m1_t vd, int64_t rs1, vint64m1_t vs2,
                             size_t vl);
vint64m2_t __riscv_vnmsub_tu(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2,
                             size_t vl);
vint64m2_t __riscv_vnmsub_tu(vint64m2_t vd, int64_t rs1, vint64m2_t vs2,
                             size_t vl);
vint64m4_t __riscv_vnmsub_tu(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2,
                             size_t vl);
vint64m4_t __riscv_vnmsub_tu(vint64m4_t vd, int64_t rs1, vint64m4_t vs2,
                             size_t vl);
vint64m8_t __riscv_vnmsub_tu(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2,
                             size_t vl);
vint64m8_t __riscv_vnmsub_tu(vint64m8_t vd, int64_t rs1, vint64m8_t vs2,
                             size_t vl);
vuint8mf8_t __riscv_vmacc_tu(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2,
                             size_t vl);
vuint8mf8_t __riscv_vmacc_tu(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2,
                             size_t vl);
vuint8mf4_t __riscv_vmacc_tu(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2,
                             size_t vl);
vuint8mf4_t __riscv_vmacc_tu(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2,
                             size_t vl);
vuint8mf2_t __riscv_vmacc_tu(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2,
                             size_t vl);
vuint8mf2_t __riscv_vmacc_tu(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2,
                             size_t vl);
vuint8m1_t __riscv_vmacc_tu(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2,
                            size_t vl);
vuint8m1_t __riscv_vmacc_tu(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2,
                            size_t vl);
vuint8m2_t __riscv_vmacc_tu(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2,
                            size_t vl);
vuint8m2_t __riscv_vmacc_tu(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2,
                            size_t vl);
vuint8m4_t __riscv_vmacc_tu(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2,
                            size_t vl);
vuint8m4_t __riscv_vmacc_tu(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2,
                            size_t vl);
vuint8m8_t __riscv_vmacc_tu(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2,
                            size_t vl);
vuint8m8_t __riscv_vmacc_tu(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2,
                            size_t vl);
vuint16mf4_t __riscv_vmacc_tu(vuint16mf4_t vd, vuint16mf4_t vs1,
                              vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmacc_tu(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2,
                              size_t vl);
vuint16mf2_t __riscv_vmacc_tu(vuint16mf2_t vd, vuint16mf2_t vs1,
                              vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmacc_tu(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2,
                              size_t vl);
vuint16m1_t __riscv_vmacc_tu(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2,
                             size_t vl);
vuint16m1_t __riscv_vmacc_tu(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
                             size_t vl);
vuint16m2_t __riscv_vmacc_tu(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2,
                             size_t vl);
vuint16m2_t __riscv_vmacc_tu(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
                             size_t vl);
vuint16m4_t __riscv_vmacc_tu(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2,
                             size_t vl);
vuint16m4_t __riscv_vmacc_tu(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
                             size_t vl);
vuint16m8_t __riscv_vmacc_tu(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2,
                             size_t vl);
vuint16m8_t __riscv_vmacc_tu(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
                             size_t vl);
vuint32mf2_t __riscv_vmacc_tu(vuint32mf2_t vd, vuint32mf2_t vs1,
                              vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmacc_tu(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2,
                              size_t vl);
vuint32m1_t __riscv_vmacc_tu(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2,
                             size_t vl);
vuint32m1_t __riscv_vmacc_tu(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
                             size_t vl);
vuint32m2_t __riscv_vmacc_tu(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2,
                             size_t vl);
vuint32m2_t __riscv_vmacc_tu(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
                             size_t vl);
vuint32m4_t __riscv_vmacc_tu(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2,
                             size_t vl);
vuint32m4_t __riscv_vmacc_tu(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
                             size_t vl);
vuint32m8_t __riscv_vmacc_tu(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2,
                             size_t vl);
vuint32m8_t __riscv_vmacc_tu(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
                             size_t vl);
vuint64m1_t __riscv_vmacc_tu(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2,
                             size_t vl);
vuint64m1_t __riscv_vmacc_tu(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
                             size_t vl);
vuint64m2_t __riscv_vmacc_tu(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2,
                             size_t vl);
vuint64m2_t __riscv_vmacc_tu(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
                             size_t vl);
vuint64m4_t __riscv_vmacc_tu(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2,
                             size_t vl);
vuint64m4_t __riscv_vmacc_tu(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
                             size_t vl);
vuint64m8_t __riscv_vmacc_tu(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2,
                             size_t vl);
vuint64m8_t __riscv_vmacc_tu(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2,
                             size_t vl);
vuint8mf8_t __riscv_vnmsac_tu(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2,
                              size_t vl);
vuint8mf8_t __riscv_vnmsac_tu(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2,
                              size_t vl);
vuint8mf4_t __riscv_vnmsac_tu(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2,
                              size_t vl);
vuint8mf4_t __riscv_vnmsac_tu(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2,
                              size_t vl);
vuint8mf2_t __riscv_vnmsac_tu(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2,
                              size_t vl);
vuint8mf2_t __riscv_vnmsac_tu(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2,
                              size_t vl);
vuint8m1_t __riscv_vnmsac_tu(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2,
                             size_t vl);
vuint8m1_t __riscv_vnmsac_tu(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2,
                             size_t vl);
vuint8m2_t __riscv_vnmsac_tu(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2,
                             size_t vl);
vuint8m2_t __riscv_vnmsac_tu(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2,
                             size_t vl);
vuint8m4_t __riscv_vnmsac_tu(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2,
                             size_t vl);
vuint8m4_t __riscv_vnmsac_tu(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2,
                             size_t vl);
vuint8m8_t __riscv_vnmsac_tu(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2,
                             size_t vl);
vuint8m8_t __riscv_vnmsac_tu(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2,
                             size_t vl);
vuint16mf4_t __riscv_vnmsac_tu(vuint16mf4_t vd, vuint16mf4_t vs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsac_tu(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2,
                               size_t vl);
vuint16mf2_t __riscv_vnmsac_tu(vuint16mf2_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsac_tu(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2,
                               size_t vl);
vuint16m1_t __riscv_vnmsac_tu(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2,
                              size_t vl);
vuint16m1_t __riscv_vnmsac_tu(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
                              size_t vl);
vuint16m2_t __riscv_vnmsac_tu(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2,
                              size_t vl);
vuint16m2_t __riscv_vnmsac_tu(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
                              size_t vl);
vuint16m4_t __riscv_vnmsac_tu(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2,
                              size_t vl);
vuint16m4_t __riscv_vnmsac_tu(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
                              size_t vl);
vuint16m8_t __riscv_vnmsac_tu(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2,
                              size_t vl);
vuint16m8_t __riscv_vnmsac_tu(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
                              size_t vl);
vuint32mf2_t __riscv_vnmsac_tu(vuint32mf2_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsac_tu(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2,
                               size_t vl);
vuint32m1_t __riscv_vnmsac_tu(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2,
                              size_t vl);
vuint32m1_t __riscv_vnmsac_tu(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
                              size_t vl);
vuint32m2_t __riscv_vnmsac_tu(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2,
                              size_t vl);
vuint32m2_t __riscv_vnmsac_tu(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
                              size_t vl);
vuint32m4_t __riscv_vnmsac_tu(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2,
                              size_t vl);
vuint32m4_t __riscv_vnmsac_tu(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
                              size_t vl);
vuint32m8_t __riscv_vnmsac_tu(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2,
                              size_t vl);
vuint32m8_t __riscv_vnmsac_tu(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
                              size_t vl);
vuint64m1_t __riscv_vnmsac_tu(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2,
                              size_t vl);
vuint64m1_t __riscv_vnmsac_tu(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
                              size_t vl);
vuint64m2_t __riscv_vnmsac_tu(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2,
                              size_t vl);
vuint64m2_t __riscv_vnmsac_tu(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
                              size_t vl);
vuint64m4_t __riscv_vnmsac_tu(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2,
                              size_t vl);
vuint64m4_t __riscv_vnmsac_tu(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
                              size_t vl);
vuint64m8_t __riscv_vnmsac_tu(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2,
                              size_t vl);
vuint64m8_t __riscv_vnmsac_tu(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2,
                              size_t vl);
vuint8mf8_t __riscv_vmadd_tu(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2,
                             size_t vl);
vuint8mf8_t __riscv_vmadd_tu(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2,
                             size_t vl);
vuint8mf4_t __riscv_vmadd_tu(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2,
                             size_t vl);
vuint8mf4_t __riscv_vmadd_tu(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2,
                             size_t vl);
vuint8mf2_t __riscv_vmadd_tu(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2,
                             size_t vl);
vuint8mf2_t __riscv_vmadd_tu(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2,
                             size_t vl);
vuint8m1_t __riscv_vmadd_tu(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2,
                            size_t vl);
vuint8m1_t __riscv_vmadd_tu(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2,
                            size_t vl);
vuint8m2_t __riscv_vmadd_tu(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2,
                            size_t vl);
vuint8m2_t __riscv_vmadd_tu(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2,
                            size_t vl);
vuint8m4_t __riscv_vmadd_tu(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2,
                            size_t vl);
vuint8m4_t __riscv_vmadd_tu(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2,
                            size_t vl);
vuint8m8_t __riscv_vmadd_tu(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2,
                            size_t vl);
vuint8m8_t __riscv_vmadd_tu(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2,
                            size_t vl);
vuint16mf4_t __riscv_vmadd_tu(vuint16mf4_t vd, vuint16mf4_t vs1,
                              vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmadd_tu(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2,
                              size_t vl);
vuint16mf2_t __riscv_vmadd_tu(vuint16mf2_t vd, vuint16mf2_t vs1,
                              vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmadd_tu(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2,
                              size_t vl);
vuint16m1_t __riscv_vmadd_tu(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2,
                             size_t vl);
vuint16m1_t __riscv_vmadd_tu(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
                             size_t vl);
vuint16m2_t __riscv_vmadd_tu(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2,
                             size_t vl);
vuint16m2_t __riscv_vmadd_tu(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
                             size_t vl);
vuint16m4_t __riscv_vmadd_tu(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2,
                             size_t vl);
vuint16m4_t __riscv_vmadd_tu(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
                             size_t vl);
vuint16m8_t __riscv_vmadd_tu(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2,
                             size_t vl);
vuint16m8_t __riscv_vmadd_tu(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
                             size_t vl);
vuint32mf2_t __riscv_vmadd_tu(vuint32mf2_t vd, vuint32mf2_t vs1,
                              vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmadd_tu(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2,
                              size_t vl);
vuint32m1_t __riscv_vmadd_tu(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2,
                             size_t vl);
vuint32m1_t __riscv_vmadd_tu(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
                             size_t vl);
vuint32m2_t __riscv_vmadd_tu(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2,
                             size_t vl);
vuint32m2_t __riscv_vmadd_tu(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
                             size_t vl);
vuint32m4_t __riscv_vmadd_tu(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2,
                             size_t vl);
vuint32m4_t __riscv_vmadd_tu(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
                             size_t vl);
vuint32m8_t __riscv_vmadd_tu(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2,
                             size_t vl);
vuint32m8_t __riscv_vmadd_tu(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
                             size_t vl);
vuint64m1_t __riscv_vmadd_tu(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2,
                             size_t vl);
vuint64m1_t __riscv_vmadd_tu(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
                             size_t vl);
vuint64m2_t __riscv_vmadd_tu(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2,
                             size_t vl);
vuint64m2_t __riscv_vmadd_tu(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
                             size_t vl);
vuint64m4_t __riscv_vmadd_tu(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2,
                             size_t vl);
vuint64m4_t __riscv_vmadd_tu(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
                             size_t vl);
vuint64m8_t __riscv_vmadd_tu(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2,
                             size_t vl);
vuint64m8_t __riscv_vmadd_tu(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2,
                             size_t vl);
vuint8mf8_t __riscv_vnmsub_tu(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2,
                              size_t vl);
vuint8mf8_t __riscv_vnmsub_tu(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2,
                              size_t vl);
vuint8mf4_t __riscv_vnmsub_tu(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2,
                              size_t vl);
vuint8mf4_t __riscv_vnmsub_tu(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2,
                              size_t vl);
vuint8mf2_t __riscv_vnmsub_tu(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2,
                              size_t vl);
vuint8mf2_t __riscv_vnmsub_tu(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2,
                              size_t vl);
vuint8m1_t __riscv_vnmsub_tu(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2,
                             size_t vl);
vuint8m1_t __riscv_vnmsub_tu(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2,
                             size_t vl);
vuint8m2_t __riscv_vnmsub_tu(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2,
                             size_t vl);
vuint8m2_t __riscv_vnmsub_tu(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2,
                             size_t vl);
vuint8m4_t __riscv_vnmsub_tu(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2,
                             size_t vl);
vuint8m4_t __riscv_vnmsub_tu(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2,
                             size_t vl);
vuint8m8_t __riscv_vnmsub_tu(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2,
                             size_t vl);
vuint8m8_t __riscv_vnmsub_tu(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2,
                             size_t vl);
vuint16mf4_t __riscv_vnmsub_tu(vuint16mf4_t vd, vuint16mf4_t vs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsub_tu(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2,
                               size_t vl);
vuint16mf2_t __riscv_vnmsub_tu(vuint16mf2_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsub_tu(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2,
                               size_t vl);
vuint16m1_t __riscv_vnmsub_tu(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2,
                              size_t vl);
vuint16m1_t __riscv_vnmsub_tu(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
                              size_t vl);
vuint16m2_t __riscv_vnmsub_tu(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2,
                              size_t vl);
vuint16m2_t __riscv_vnmsub_tu(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
                              size_t vl);
vuint16m4_t __riscv_vnmsub_tu(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2,
                              size_t vl);
vuint16m4_t __riscv_vnmsub_tu(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
                              size_t vl);
vuint16m8_t __riscv_vnmsub_tu(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2,
                              size_t vl);
vuint16m8_t __riscv_vnmsub_tu(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
                              size_t vl);
vuint32mf2_t __riscv_vnmsub_tu(vuint32mf2_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsub_tu(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2,
                               size_t vl);
vuint32m1_t __riscv_vnmsub_tu(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2,
                              size_t vl);
vuint32m1_t __riscv_vnmsub_tu(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
                              size_t vl);
vuint32m2_t __riscv_vnmsub_tu(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2,
                              size_t vl);
vuint32m2_t __riscv_vnmsub_tu(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
                              size_t vl);
vuint32m4_t __riscv_vnmsub_tu(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2,
                              size_t vl);
vuint32m4_t __riscv_vnmsub_tu(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
                              size_t vl);
vuint32m8_t __riscv_vnmsub_tu(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2,
                              size_t vl);
vuint32m8_t __riscv_vnmsub_tu(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
                              size_t vl);
vuint64m1_t __riscv_vnmsub_tu(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2,
                              size_t vl);
vuint64m1_t __riscv_vnmsub_tu(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
                              size_t vl);
vuint64m2_t __riscv_vnmsub_tu(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2,
                              size_t vl);
vuint64m2_t __riscv_vnmsub_tu(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
                              size_t vl);
vuint64m4_t __riscv_vnmsub_tu(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2,
                              size_t vl);
vuint64m4_t __riscv_vnmsub_tu(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
                              size_t vl);
vuint64m8_t __riscv_vnmsub_tu(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2,
                              size_t vl);
vuint64m8_t __riscv_vnmsub_tu(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2,
                              size_t vl);
// masked functions
vint8mf8_t __riscv_vmacc_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vmacc_tum(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vmacc_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vmacc_tum(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vmacc_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                             vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vmacc_tum(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                             vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vmacc_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                            vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vmacc_tum(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                            vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vmacc_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                            vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vmacc_tum(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                            vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vmacc_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                            vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vmacc_tum(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                            vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vmacc_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                            vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vmacc_tum(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                            vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vmacc_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vmacc_tum(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vmacc_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                              vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vmacc_tum(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                              vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vmacc_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                             vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vmacc_tum(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                             vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vmacc_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                             vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vmacc_tum(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                             vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vmacc_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                             vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vmacc_tum(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                             vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vmacc_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                             vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vmacc_tum(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                             vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vmacc_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                              vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vmacc_tum(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                              vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vmacc_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                             vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vmacc_tum(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                             vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vmacc_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                             vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vmacc_tum(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                             vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vmacc_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                             vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vmacc_tum(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                             vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vmacc_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                             vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vmacc_tum(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                             vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vmacc_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                             vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vmacc_tum(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                             vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vmacc_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                             vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vmacc_tum(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                             vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vmacc_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                             vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vmacc_tum(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                             vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vmacc_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                             vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vmacc_tum(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                             vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsac_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsac_tum(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsac_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsac_tum(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsac_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                              vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsac_tum(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                              vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vnmsac_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                             vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vnmsac_tum(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                             vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsac_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                             vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vnmsac_tum(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                             vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsac_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                             vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vnmsac_tum(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                             vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsac_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                             vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vnmsac_tum(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                             vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsac_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsac_tum(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsac_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                               vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsac_tum(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                               vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vnmsac_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                              vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vnmsac_tum(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                              vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vnmsac_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                              vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vnmsac_tum(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                              vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vnmsac_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                              vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vnmsac_tum(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                              vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vnmsac_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                              vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vnmsac_tum(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                              vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsac_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                               vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsac_tum(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                               vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vnmsac_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                              vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vnmsac_tum(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                              vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vnmsac_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                              vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vnmsac_tum(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                              vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vnmsac_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                              vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vnmsac_tum(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                              vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vnmsac_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                              vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vnmsac_tum(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                              vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vnmsac_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                              vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vnmsac_tum(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                              vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vnmsac_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                              vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vnmsac_tum(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                              vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vnmsac_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                              vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vnmsac_tum(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                              vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vnmsac_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                              vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vnmsac_tum(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                              vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vmadd_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vmadd_tum(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vmadd_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vmadd_tum(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vmadd_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                             vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vmadd_tum(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                             vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vmadd_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                            vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vmadd_tum(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                            vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vmadd_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                            vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vmadd_tum(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                            vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vmadd_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                            vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vmadd_tum(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                            vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vmadd_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                            vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vmadd_tum(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                            vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vmadd_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vmadd_tum(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vmadd_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                              vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vmadd_tum(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                              vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vmadd_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                             vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vmadd_tum(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                             vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vmadd_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                             vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vmadd_tum(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                             vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vmadd_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                             vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vmadd_tum(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                             vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vmadd_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                             vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vmadd_tum(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                             vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vmadd_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                              vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vmadd_tum(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                              vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vmadd_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                             vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vmadd_tum(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                             vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vmadd_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                             vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vmadd_tum(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                             vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vmadd_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                             vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vmadd_tum(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                             vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vmadd_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                             vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vmadd_tum(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                             vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vmadd_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                             vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vmadd_tum(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                             vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vmadd_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                             vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vmadd_tum(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                             vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vmadd_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                             vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vmadd_tum(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                             vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vmadd_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                             vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vmadd_tum(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                             vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsub_tum(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsub_tum(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsub_tum(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsub_tum(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsub_tum(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                              vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsub_tum(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                              vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vnmsub_tum(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                             vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vnmsub_tum(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                             vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsub_tum(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                             vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vnmsub_tum(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                             vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsub_tum(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                             vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vnmsub_tum(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                             vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsub_tum(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                             vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vnmsub_tum(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                             vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsub_tum(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsub_tum(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsub_tum(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                               vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsub_tum(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                               vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vnmsub_tum(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                              vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vnmsub_tum(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                              vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vnmsub_tum(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                              vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vnmsub_tum(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                              vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vnmsub_tum(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                              vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vnmsub_tum(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                              vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vnmsub_tum(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                              vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vnmsub_tum(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                              vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsub_tum(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                               vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsub_tum(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                               vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vnmsub_tum(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                              vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vnmsub_tum(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                              vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vnmsub_tum(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                              vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vnmsub_tum(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                              vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vnmsub_tum(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                              vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vnmsub_tum(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                              vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vnmsub_tum(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                              vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vnmsub_tum(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                              vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vnmsub_tum(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                              vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vnmsub_tum(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                              vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vnmsub_tum(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                              vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vnmsub_tum(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                              vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vnmsub_tum(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                              vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vnmsub_tum(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                              vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vnmsub_tum(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                              vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vnmsub_tum(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                              vint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmacc_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmacc_tum(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vmacc_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vmacc_tum(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vmacc_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vmacc_tum(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vmacc_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vmacc_tum(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vmacc_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vmacc_tum(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vmacc_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vmacc_tum(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vmacc_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                             vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vmacc_tum(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                             vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vmacc_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmacc_tum(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vmacc_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmacc_tum(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vmacc_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vmacc_tum(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vmacc_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vmacc_tum(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vmacc_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vmacc_tum(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vmacc_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                              vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vmacc_tum(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                              vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vmacc_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmacc_tum(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vmacc_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vmacc_tum(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vmacc_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vmacc_tum(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vmacc_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vmacc_tum(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vmacc_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                              vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vmacc_tum(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                              vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vmacc_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vmacc_tum(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vmacc_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vmacc_tum(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vmacc_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vmacc_tum(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vmacc_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                              vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vmacc_tum(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                              vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsac_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsac_tum(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsac_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsac_tum(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsac_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsac_tum(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsac_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsac_tum(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsac_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsac_tum(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsac_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsac_tum(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsac_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                              vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsac_tum(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                              vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsac_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsac_tum(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsac_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsac_tum(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsac_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsac_tum(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsac_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsac_tum(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsac_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsac_tum(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsac_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                               vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsac_tum(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                               vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsac_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsac_tum(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsac_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsac_tum(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsac_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsac_tum(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsac_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsac_tum(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsac_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                               vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsac_tum(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                               vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsac_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsac_tum(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsac_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsac_tum(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsac_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsac_tum(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsac_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                               vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsac_tum(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                               vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmadd_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmadd_tum(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vmadd_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vmadd_tum(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vmadd_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vmadd_tum(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vmadd_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vmadd_tum(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vmadd_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vmadd_tum(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vmadd_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vmadd_tum(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vmadd_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                             vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vmadd_tum(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                             vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vmadd_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmadd_tum(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vmadd_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmadd_tum(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vmadd_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vmadd_tum(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vmadd_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vmadd_tum(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vmadd_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vmadd_tum(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vmadd_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                              vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vmadd_tum(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                              vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vmadd_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmadd_tum(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vmadd_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vmadd_tum(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vmadd_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vmadd_tum(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vmadd_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vmadd_tum(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vmadd_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                              vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vmadd_tum(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                              vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vmadd_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vmadd_tum(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vmadd_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vmadd_tum(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vmadd_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vmadd_tum(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vmadd_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                              vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vmadd_tum(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                              vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsub_tum(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsub_tum(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsub_tum(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsub_tum(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsub_tum(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsub_tum(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsub_tum(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsub_tum(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsub_tum(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsub_tum(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsub_tum(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsub_tum(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsub_tum(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                              vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsub_tum(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                              vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsub_tum(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsub_tum(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsub_tum(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsub_tum(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsub_tum(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsub_tum(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsub_tum(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsub_tum(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsub_tum(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsub_tum(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsub_tum(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                               vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsub_tum(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                               vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsub_tum(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsub_tum(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsub_tum(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsub_tum(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsub_tum(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsub_tum(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsub_tum(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsub_tum(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsub_tum(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                               vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsub_tum(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                               vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsub_tum(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsub_tum(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsub_tum(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsub_tum(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsub_tum(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsub_tum(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsub_tum(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                               vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsub_tum(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                               vuint64m8_t vs2, size_t vl);
// masked functions
vint8mf8_t __riscv_vmacc_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vmacc_tumu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vmacc_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vmacc_tumu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vmacc_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                              vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vmacc_tumu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                              vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vmacc_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                             vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vmacc_tumu(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                             vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vmacc_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                             vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vmacc_tumu(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                             vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vmacc_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                             vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vmacc_tumu(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                             vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vmacc_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                             vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vmacc_tumu(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                             vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vmacc_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vmacc_tumu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vmacc_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                               vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vmacc_tumu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                               vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vmacc_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                              vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vmacc_tumu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                              vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vmacc_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                              vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vmacc_tumu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                              vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vmacc_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                              vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vmacc_tumu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                              vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vmacc_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                              vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vmacc_tumu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                              vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vmacc_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                               vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vmacc_tumu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                               vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vmacc_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                              vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vmacc_tumu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                              vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vmacc_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                              vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vmacc_tumu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                              vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vmacc_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                              vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vmacc_tumu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                              vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vmacc_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                              vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vmacc_tumu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                              vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vmacc_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                              vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vmacc_tumu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                              vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vmacc_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                              vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vmacc_tumu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                              vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vmacc_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                              vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vmacc_tumu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                              vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vmacc_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                              vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vmacc_tumu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                              vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsac_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                               vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsac_tumu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                               vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsac_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                               vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsac_tumu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                               vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsac_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                               vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsac_tumu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                               vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vnmsac_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                              vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vnmsac_tumu(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                              vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsac_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                              vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vnmsac_tumu(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                              vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsac_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                              vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vnmsac_tumu(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                              vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsac_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                              vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vnmsac_tumu(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                              vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsac_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                                vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsac_tumu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                                vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsac_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                                vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsac_tumu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                                vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vnmsac_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                               vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vnmsac_tumu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                               vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vnmsac_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                               vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vnmsac_tumu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                               vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vnmsac_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                               vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vnmsac_tumu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                               vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vnmsac_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                               vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vnmsac_tumu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                               vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsac_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                                vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsac_tumu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                                vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vnmsac_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                               vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vnmsac_tumu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                               vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vnmsac_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                               vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vnmsac_tumu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                               vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vnmsac_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                               vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vnmsac_tumu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                               vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vnmsac_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                               vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vnmsac_tumu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                               vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vnmsac_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                               vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vnmsac_tumu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                               vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vnmsac_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                               vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vnmsac_tumu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                               vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vnmsac_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                               vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vnmsac_tumu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                               vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vnmsac_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                               vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vnmsac_tumu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                               vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vmadd_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vmadd_tumu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                              vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vmadd_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vmadd_tumu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                              vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vmadd_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                              vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vmadd_tumu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                              vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vmadd_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                             vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vmadd_tumu(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                             vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vmadd_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                             vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vmadd_tumu(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                             vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vmadd_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                             vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vmadd_tumu(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                             vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vmadd_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                             vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vmadd_tumu(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                             vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vmadd_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vmadd_tumu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                               vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vmadd_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                               vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vmadd_tumu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                               vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vmadd_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                              vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vmadd_tumu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                              vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vmadd_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                              vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vmadd_tumu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                              vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vmadd_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                              vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vmadd_tumu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                              vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vmadd_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                              vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vmadd_tumu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                              vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vmadd_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                               vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vmadd_tumu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                               vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vmadd_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                              vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vmadd_tumu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                              vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vmadd_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                              vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vmadd_tumu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                              vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vmadd_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                              vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vmadd_tumu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                              vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vmadd_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                              vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vmadd_tumu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                              vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vmadd_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                              vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vmadd_tumu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                              vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vmadd_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                              vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vmadd_tumu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                              vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vmadd_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                              vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vmadd_tumu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                              vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vmadd_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                              vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vmadd_tumu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                              vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsub_tumu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                               vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsub_tumu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                               vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsub_tumu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                               vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsub_tumu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                               vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsub_tumu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                               vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsub_tumu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                               vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vnmsub_tumu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                              vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vnmsub_tumu(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                              vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsub_tumu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                              vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vnmsub_tumu(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                              vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsub_tumu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                              vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vnmsub_tumu(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                              vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsub_tumu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                              vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vnmsub_tumu(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                              vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsub_tumu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                                vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsub_tumu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                                vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsub_tumu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                                vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsub_tumu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                                vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vnmsub_tumu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                               vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vnmsub_tumu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                               vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vnmsub_tumu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                               vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vnmsub_tumu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                               vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vnmsub_tumu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                               vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vnmsub_tumu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                               vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vnmsub_tumu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                               vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vnmsub_tumu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                               vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsub_tumu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                                vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsub_tumu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                                vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vnmsub_tumu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                               vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vnmsub_tumu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                               vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vnmsub_tumu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                               vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vnmsub_tumu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                               vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vnmsub_tumu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                               vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vnmsub_tumu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                               vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vnmsub_tumu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                               vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vnmsub_tumu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                               vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vnmsub_tumu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                               vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vnmsub_tumu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                               vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vnmsub_tumu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                               vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vnmsub_tumu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                               vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vnmsub_tumu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                               vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vnmsub_tumu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                               vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vnmsub_tumu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                               vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vnmsub_tumu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                               vint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmacc_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmacc_tumu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vmacc_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vmacc_tumu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vmacc_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vmacc_tumu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vmacc_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vmacc_tumu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vmacc_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vmacc_tumu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vmacc_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vmacc_tumu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vmacc_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                              vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vmacc_tumu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                              vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vmacc_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmacc_tumu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vmacc_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmacc_tumu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vmacc_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vmacc_tumu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vmacc_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vmacc_tumu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vmacc_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vmacc_tumu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vmacc_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                               vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vmacc_tumu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                               vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vmacc_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmacc_tumu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vmacc_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vmacc_tumu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vmacc_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vmacc_tumu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vmacc_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vmacc_tumu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vmacc_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                               vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vmacc_tumu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                               vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vmacc_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vmacc_tumu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vmacc_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vmacc_tumu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vmacc_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vmacc_tumu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vmacc_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                               vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vmacc_tumu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                               vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsac_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                                vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsac_tumu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                                vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsac_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                                vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsac_tumu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                                vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsac_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                                vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsac_tumu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                                vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsac_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                               vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsac_tumu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                               vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsac_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                               vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsac_tumu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                               vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsac_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                               vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsac_tumu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                               vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsac_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                               vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsac_tumu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                               vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsac_tumu(vbool64_t vm, vuint16mf4_t vd,
                                 vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsac_tumu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                                 vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsac_tumu(vbool32_t vm, vuint16mf2_t vd,
                                 vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsac_tumu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                                 vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsac_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                                vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsac_tumu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                                vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsac_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                                vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsac_tumu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                                vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsac_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                                vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsac_tumu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                                vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsac_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                                vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsac_tumu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                                vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsac_tumu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsac_tumu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                                 vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsac_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                                vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsac_tumu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                                vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsac_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                                vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsac_tumu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                                vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsac_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                                vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsac_tumu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                                vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsac_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                                vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsac_tumu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                                vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsac_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                                vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsac_tumu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                                vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsac_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                                vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsac_tumu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                                vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsac_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                                vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsac_tumu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                                vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsac_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                                vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsac_tumu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                                vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmadd_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmadd_tumu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                               vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vmadd_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vmadd_tumu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                               vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vmadd_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vmadd_tumu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                               vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vmadd_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vmadd_tumu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                              vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vmadd_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vmadd_tumu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                              vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vmadd_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vmadd_tumu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                              vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vmadd_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                              vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vmadd_tumu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                              vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vmadd_tumu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmadd_tumu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                                vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vmadd_tumu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmadd_tumu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                                vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vmadd_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vmadd_tumu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                               vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vmadd_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vmadd_tumu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                               vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vmadd_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vmadd_tumu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                               vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vmadd_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                               vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vmadd_tumu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                               vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vmadd_tumu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmadd_tumu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                                vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vmadd_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vmadd_tumu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                               vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vmadd_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vmadd_tumu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                               vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vmadd_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vmadd_tumu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                               vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vmadd_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                               vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vmadd_tumu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                               vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vmadd_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vmadd_tumu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                               vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vmadd_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vmadd_tumu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                               vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vmadd_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vmadd_tumu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                               vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vmadd_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                               vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vmadd_tumu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                               vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsub_tumu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                                vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsub_tumu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                                vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsub_tumu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                                vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsub_tumu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                                vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsub_tumu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                                vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsub_tumu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                                vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsub_tumu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                               vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsub_tumu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                               vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsub_tumu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                               vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsub_tumu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                               vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsub_tumu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                               vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsub_tumu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                               vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsub_tumu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                               vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsub_tumu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                               vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsub_tumu(vbool64_t vm, vuint16mf4_t vd,
                                 vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsub_tumu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                                 vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsub_tumu(vbool32_t vm, vuint16mf2_t vd,
                                 vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsub_tumu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                                 vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsub_tumu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                                vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsub_tumu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                                vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsub_tumu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                                vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsub_tumu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                                vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsub_tumu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                                vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsub_tumu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                                vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsub_tumu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                                vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsub_tumu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                                vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsub_tumu(vbool64_t vm, vuint32mf2_t vd,
                                 vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsub_tumu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                                 vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsub_tumu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                                vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsub_tumu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                                vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsub_tumu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                                vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsub_tumu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                                vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsub_tumu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                                vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsub_tumu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                                vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsub_tumu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                                vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsub_tumu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                                vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsub_tumu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                                vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsub_tumu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                                vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsub_tumu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                                vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsub_tumu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                                vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsub_tumu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                                vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsub_tumu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                                vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsub_tumu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                                vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsub_tumu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                                vuint64m8_t vs2, size_t vl);
// masked functions
vint8mf8_t __riscv_vmacc_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                            vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vmacc_mu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                            vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vmacc_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                            vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vmacc_mu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                            vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vmacc_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                            vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vmacc_mu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                            vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vmacc_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                           vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vmacc_mu(vbool8_t vm, vint8m1_t vd, int8_t rs1, vint8m1_t vs2,
                           size_t vl);
vint8m2_t __riscv_vmacc_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                           vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vmacc_mu(vbool4_t vm, vint8m2_t vd, int8_t rs1, vint8m2_t vs2,
                           size_t vl);
vint8m4_t __riscv_vmacc_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                           vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vmacc_mu(vbool2_t vm, vint8m4_t vd, int8_t rs1, vint8m4_t vs2,
                           size_t vl);
vint8m8_t __riscv_vmacc_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                           vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vmacc_mu(vbool1_t vm, vint8m8_t vd, int8_t rs1, vint8m8_t vs2,
                           size_t vl);
vint16mf4_t __riscv_vmacc_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                             vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vmacc_mu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                             vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vmacc_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                             vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vmacc_mu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                             vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vmacc_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                            vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vmacc_mu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                            vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vmacc_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                            vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vmacc_mu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                            vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vmacc_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                            vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vmacc_mu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                            vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vmacc_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                            vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vmacc_mu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                            vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vmacc_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                             vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vmacc_mu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                             vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vmacc_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                            vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vmacc_mu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                            vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vmacc_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                            vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vmacc_mu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                            vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vmacc_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                            vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vmacc_mu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                            vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vmacc_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                            vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vmacc_mu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                            vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vmacc_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                            vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vmacc_mu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                            vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vmacc_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                            vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vmacc_mu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                            vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vmacc_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                            vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vmacc_mu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                            vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vmacc_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                            vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vmacc_mu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                            vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsac_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsac_mu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsac_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsac_mu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsac_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                             vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsac_mu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                             vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vnmsac_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                            vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vnmsac_mu(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                            vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsac_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                            vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vnmsac_mu(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                            vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsac_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                            vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vnmsac_mu(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                            vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsac_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                            vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vnmsac_mu(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                            vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsac_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsac_mu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsac_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                              vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsac_mu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                              vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vnmsac_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                             vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vnmsac_mu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                             vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vnmsac_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                             vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vnmsac_mu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                             vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vnmsac_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                             vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vnmsac_mu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                             vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vnmsac_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                             vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vnmsac_mu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                             vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsac_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                              vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsac_mu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                              vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vnmsac_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                             vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vnmsac_mu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                             vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vnmsac_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                             vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vnmsac_mu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                             vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vnmsac_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                             vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vnmsac_mu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                             vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vnmsac_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                             vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vnmsac_mu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                             vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vnmsac_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                             vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vnmsac_mu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                             vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vnmsac_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                             vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vnmsac_mu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                             vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vnmsac_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                             vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vnmsac_mu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                             vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vnmsac_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                             vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vnmsac_mu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                             vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vmadd_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                            vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vmadd_mu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                            vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vmadd_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                            vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vmadd_mu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                            vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vmadd_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                            vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vmadd_mu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                            vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vmadd_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                           vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vmadd_mu(vbool8_t vm, vint8m1_t vd, int8_t rs1, vint8m1_t vs2,
                           size_t vl);
vint8m2_t __riscv_vmadd_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                           vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vmadd_mu(vbool4_t vm, vint8m2_t vd, int8_t rs1, vint8m2_t vs2,
                           size_t vl);
vint8m4_t __riscv_vmadd_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                           vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vmadd_mu(vbool2_t vm, vint8m4_t vd, int8_t rs1, vint8m4_t vs2,
                           size_t vl);
vint8m8_t __riscv_vmadd_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                           vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vmadd_mu(vbool1_t vm, vint8m8_t vd, int8_t rs1, vint8m8_t vs2,
                           size_t vl);
vint16mf4_t __riscv_vmadd_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                             vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vmadd_mu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                             vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vmadd_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                             vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vmadd_mu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                             vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vmadd_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                            vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vmadd_mu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                            vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vmadd_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                            vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vmadd_mu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                            vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vmadd_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                            vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vmadd_mu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                            vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vmadd_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                            vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vmadd_mu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                            vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vmadd_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                             vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vmadd_mu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                             vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vmadd_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                            vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vmadd_mu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                            vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vmadd_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                            vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vmadd_mu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                            vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vmadd_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                            vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vmadd_mu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                            vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vmadd_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                            vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vmadd_mu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                            vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vmadd_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                            vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vmadd_mu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                            vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vmadd_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                            vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vmadd_mu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                            vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vmadd_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                            vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vmadd_mu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                            vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vmadd_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                            vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vmadd_mu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                            vint64m8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsub_mu(vbool64_t vm, vint8mf8_t vd, vint8mf8_t vs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf8_t __riscv_vnmsub_mu(vbool64_t vm, vint8mf8_t vd, int8_t rs1,
                             vint8mf8_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsub_mu(vbool32_t vm, vint8mf4_t vd, vint8mf4_t vs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf4_t __riscv_vnmsub_mu(vbool32_t vm, vint8mf4_t vd, int8_t rs1,
                             vint8mf4_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsub_mu(vbool16_t vm, vint8mf2_t vd, vint8mf2_t vs1,
                             vint8mf2_t vs2, size_t vl);
vint8mf2_t __riscv_vnmsub_mu(vbool16_t vm, vint8mf2_t vd, int8_t rs1,
                             vint8mf2_t vs2, size_t vl);
vint8m1_t __riscv_vnmsub_mu(vbool8_t vm, vint8m1_t vd, vint8m1_t vs1,
                            vint8m1_t vs2, size_t vl);
vint8m1_t __riscv_vnmsub_mu(vbool8_t vm, vint8m1_t vd, int8_t rs1,
                            vint8m1_t vs2, size_t vl);
vint8m2_t __riscv_vnmsub_mu(vbool4_t vm, vint8m2_t vd, vint8m2_t vs1,
                            vint8m2_t vs2, size_t vl);
vint8m2_t __riscv_vnmsub_mu(vbool4_t vm, vint8m2_t vd, int8_t rs1,
                            vint8m2_t vs2, size_t vl);
vint8m4_t __riscv_vnmsub_mu(vbool2_t vm, vint8m4_t vd, vint8m4_t vs1,
                            vint8m4_t vs2, size_t vl);
vint8m4_t __riscv_vnmsub_mu(vbool2_t vm, vint8m4_t vd, int8_t rs1,
                            vint8m4_t vs2, size_t vl);
vint8m8_t __riscv_vnmsub_mu(vbool1_t vm, vint8m8_t vd, vint8m8_t vs1,
                            vint8m8_t vs2, size_t vl);
vint8m8_t __riscv_vnmsub_mu(vbool1_t vm, vint8m8_t vd, int8_t rs1,
                            vint8m8_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsub_mu(vbool64_t vm, vint16mf4_t vd, vint16mf4_t vs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf4_t __riscv_vnmsub_mu(vbool64_t vm, vint16mf4_t vd, int16_t rs1,
                              vint16mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsub_mu(vbool32_t vm, vint16mf2_t vd, vint16mf2_t vs1,
                              vint16mf2_t vs2, size_t vl);
vint16mf2_t __riscv_vnmsub_mu(vbool32_t vm, vint16mf2_t vd, int16_t rs1,
                              vint16mf2_t vs2, size_t vl);
vint16m1_t __riscv_vnmsub_mu(vbool16_t vm, vint16m1_t vd, vint16m1_t vs1,
                             vint16m1_t vs2, size_t vl);
vint16m1_t __riscv_vnmsub_mu(vbool16_t vm, vint16m1_t vd, int16_t rs1,
                             vint16m1_t vs2, size_t vl);
vint16m2_t __riscv_vnmsub_mu(vbool8_t vm, vint16m2_t vd, vint16m2_t vs1,
                             vint16m2_t vs2, size_t vl);
vint16m2_t __riscv_vnmsub_mu(vbool8_t vm, vint16m2_t vd, int16_t rs1,
                             vint16m2_t vs2, size_t vl);
vint16m4_t __riscv_vnmsub_mu(vbool4_t vm, vint16m4_t vd, vint16m4_t vs1,
                             vint16m4_t vs2, size_t vl);
vint16m4_t __riscv_vnmsub_mu(vbool4_t vm, vint16m4_t vd, int16_t rs1,
                             vint16m4_t vs2, size_t vl);
vint16m8_t __riscv_vnmsub_mu(vbool2_t vm, vint16m8_t vd, vint16m8_t vs1,
                             vint16m8_t vs2, size_t vl);
vint16m8_t __riscv_vnmsub_mu(vbool2_t vm, vint16m8_t vd, int16_t rs1,
                             vint16m8_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsub_mu(vbool64_t vm, vint32mf2_t vd, vint32mf2_t vs1,
                              vint32mf2_t vs2, size_t vl);
vint32mf2_t __riscv_vnmsub_mu(vbool64_t vm, vint32mf2_t vd, int32_t rs1,
                              vint32mf2_t vs2, size_t vl);
vint32m1_t __riscv_vnmsub_mu(vbool32_t vm, vint32m1_t vd, vint32m1_t vs1,
                             vint32m1_t vs2, size_t vl);
vint32m1_t __riscv_vnmsub_mu(vbool32_t vm, vint32m1_t vd, int32_t rs1,
                             vint32m1_t vs2, size_t vl);
vint32m2_t __riscv_vnmsub_mu(vbool16_t vm, vint32m2_t vd, vint32m2_t vs1,
                             vint32m2_t vs2, size_t vl);
vint32m2_t __riscv_vnmsub_mu(vbool16_t vm, vint32m2_t vd, int32_t rs1,
                             vint32m2_t vs2, size_t vl);
vint32m4_t __riscv_vnmsub_mu(vbool8_t vm, vint32m4_t vd, vint32m4_t vs1,
                             vint32m4_t vs2, size_t vl);
vint32m4_t __riscv_vnmsub_mu(vbool8_t vm, vint32m4_t vd, int32_t rs1,
                             vint32m4_t vs2, size_t vl);
vint32m8_t __riscv_vnmsub_mu(vbool4_t vm, vint32m8_t vd, vint32m8_t vs1,
                             vint32m8_t vs2, size_t vl);
vint32m8_t __riscv_vnmsub_mu(vbool4_t vm, vint32m8_t vd, int32_t rs1,
                             vint32m8_t vs2, size_t vl);
vint64m1_t __riscv_vnmsub_mu(vbool64_t vm, vint64m1_t vd, vint64m1_t vs1,
                             vint64m1_t vs2, size_t vl);
vint64m1_t __riscv_vnmsub_mu(vbool64_t vm, vint64m1_t vd, int64_t rs1,
                             vint64m1_t vs2, size_t vl);
vint64m2_t __riscv_vnmsub_mu(vbool32_t vm, vint64m2_t vd, vint64m2_t vs1,
                             vint64m2_t vs2, size_t vl);
vint64m2_t __riscv_vnmsub_mu(vbool32_t vm, vint64m2_t vd, int64_t rs1,
                             vint64m2_t vs2, size_t vl);
vint64m4_t __riscv_vnmsub_mu(vbool16_t vm, vint64m4_t vd, vint64m4_t vs1,
                             vint64m4_t vs2, size_t vl);
vint64m4_t __riscv_vnmsub_mu(vbool16_t vm, vint64m4_t vd, int64_t rs1,
                             vint64m4_t vs2, size_t vl);
vint64m8_t __riscv_vnmsub_mu(vbool8_t vm, vint64m8_t vd, vint64m8_t vs1,
                             vint64m8_t vs2, size_t vl);
vint64m8_t __riscv_vnmsub_mu(vbool8_t vm, vint64m8_t vd, int64_t rs1,
                             vint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmacc_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                             vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmacc_mu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                             vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vmacc_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                             vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vmacc_mu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                             vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vmacc_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                             vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vmacc_mu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                             vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vmacc_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                            vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vmacc_mu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                            vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vmacc_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                            vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vmacc_mu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                            vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vmacc_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                            vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vmacc_mu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                            vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vmacc_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                            vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vmacc_mu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                            vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vmacc_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                              vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmacc_mu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                              vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vmacc_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                              vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmacc_mu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                              vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vmacc_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                             vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vmacc_mu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                             vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vmacc_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                             vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vmacc_mu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                             vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vmacc_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                             vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vmacc_mu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                             vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vmacc_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                             vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vmacc_mu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                             vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vmacc_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                              vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmacc_mu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                              vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vmacc_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                             vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vmacc_mu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                             vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vmacc_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                             vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vmacc_mu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                             vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vmacc_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                             vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vmacc_mu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                             vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vmacc_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                             vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vmacc_mu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                             vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vmacc_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                             vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vmacc_mu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                             vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vmacc_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                             vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vmacc_mu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                             vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vmacc_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                             vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vmacc_mu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                             vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vmacc_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                             vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vmacc_mu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                             vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsac_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsac_mu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsac_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsac_mu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsac_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsac_mu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsac_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsac_mu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsac_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsac_mu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsac_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsac_mu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsac_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                             vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsac_mu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                             vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsac_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsac_mu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsac_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsac_mu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsac_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsac_mu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsac_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsac_mu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsac_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsac_mu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsac_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                              vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsac_mu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                              vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsac_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsac_mu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsac_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsac_mu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsac_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsac_mu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsac_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsac_mu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsac_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                              vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsac_mu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                              vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsac_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsac_mu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsac_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsac_mu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsac_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsac_mu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsac_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                              vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsac_mu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                              vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmadd_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                             vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vmadd_mu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                             vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vmadd_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                             vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vmadd_mu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                             vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vmadd_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                             vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vmadd_mu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                             vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vmadd_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                            vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vmadd_mu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                            vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vmadd_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                            vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vmadd_mu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                            vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vmadd_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                            vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vmadd_mu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                            vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vmadd_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                            vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vmadd_mu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                            vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vmadd_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                              vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vmadd_mu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                              vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vmadd_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                              vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vmadd_mu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                              vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vmadd_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                             vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vmadd_mu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                             vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vmadd_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                             vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vmadd_mu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                             vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vmadd_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                             vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vmadd_mu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                             vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vmadd_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                             vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vmadd_mu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                             vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vmadd_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                              vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vmadd_mu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                              vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vmadd_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                             vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vmadd_mu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                             vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vmadd_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                             vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vmadd_mu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                             vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vmadd_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                             vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vmadd_mu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                             vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vmadd_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                             vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vmadd_mu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                             vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vmadd_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                             vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vmadd_mu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                             vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vmadd_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                             vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vmadd_mu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                             vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vmadd_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                             vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vmadd_mu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                             vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vmadd_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                             vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vmadd_mu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                             vuint64m8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsub_mu(vbool64_t vm, vuint8mf8_t vd, vuint8mf8_t vs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf8_t __riscv_vnmsub_mu(vbool64_t vm, vuint8mf8_t vd, uint8_t rs1,
                              vuint8mf8_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsub_mu(vbool32_t vm, vuint8mf4_t vd, vuint8mf4_t vs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf4_t __riscv_vnmsub_mu(vbool32_t vm, vuint8mf4_t vd, uint8_t rs1,
                              vuint8mf4_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsub_mu(vbool16_t vm, vuint8mf2_t vd, vuint8mf2_t vs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8mf2_t __riscv_vnmsub_mu(vbool16_t vm, vuint8mf2_t vd, uint8_t rs1,
                              vuint8mf2_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsub_mu(vbool8_t vm, vuint8m1_t vd, vuint8m1_t vs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m1_t __riscv_vnmsub_mu(vbool8_t vm, vuint8m1_t vd, uint8_t rs1,
                             vuint8m1_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsub_mu(vbool4_t vm, vuint8m2_t vd, vuint8m2_t vs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m2_t __riscv_vnmsub_mu(vbool4_t vm, vuint8m2_t vd, uint8_t rs1,
                             vuint8m2_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsub_mu(vbool2_t vm, vuint8m4_t vd, vuint8m4_t vs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m4_t __riscv_vnmsub_mu(vbool2_t vm, vuint8m4_t vd, uint8_t rs1,
                             vuint8m4_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsub_mu(vbool1_t vm, vuint8m8_t vd, vuint8m8_t vs1,
                             vuint8m8_t vs2, size_t vl);
vuint8m8_t __riscv_vnmsub_mu(vbool1_t vm, vuint8m8_t vd, uint8_t rs1,
                             vuint8m8_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsub_mu(vbool64_t vm, vuint16mf4_t vd, vuint16mf4_t vs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf4_t __riscv_vnmsub_mu(vbool64_t vm, vuint16mf4_t vd, uint16_t rs1,
                               vuint16mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsub_mu(vbool32_t vm, vuint16mf2_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16mf2_t __riscv_vnmsub_mu(vbool32_t vm, vuint16mf2_t vd, uint16_t rs1,
                               vuint16mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsub_mu(vbool16_t vm, vuint16m1_t vd, vuint16m1_t vs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m1_t __riscv_vnmsub_mu(vbool16_t vm, vuint16m1_t vd, uint16_t rs1,
                              vuint16m1_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsub_mu(vbool8_t vm, vuint16m2_t vd, vuint16m2_t vs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m2_t __riscv_vnmsub_mu(vbool8_t vm, vuint16m2_t vd, uint16_t rs1,
                              vuint16m2_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsub_mu(vbool4_t vm, vuint16m4_t vd, vuint16m4_t vs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m4_t __riscv_vnmsub_mu(vbool4_t vm, vuint16m4_t vd, uint16_t rs1,
                              vuint16m4_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsub_mu(vbool2_t vm, vuint16m8_t vd, vuint16m8_t vs1,
                              vuint16m8_t vs2, size_t vl);
vuint16m8_t __riscv_vnmsub_mu(vbool2_t vm, vuint16m8_t vd, uint16_t rs1,
                              vuint16m8_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsub_mu(vbool64_t vm, vuint32mf2_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32mf2_t __riscv_vnmsub_mu(vbool64_t vm, vuint32mf2_t vd, uint32_t rs1,
                               vuint32mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsub_mu(vbool32_t vm, vuint32m1_t vd, vuint32m1_t vs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m1_t __riscv_vnmsub_mu(vbool32_t vm, vuint32m1_t vd, uint32_t rs1,
                              vuint32m1_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsub_mu(vbool16_t vm, vuint32m2_t vd, vuint32m2_t vs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m2_t __riscv_vnmsub_mu(vbool16_t vm, vuint32m2_t vd, uint32_t rs1,
                              vuint32m2_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsub_mu(vbool8_t vm, vuint32m4_t vd, vuint32m4_t vs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m4_t __riscv_vnmsub_mu(vbool8_t vm, vuint32m4_t vd, uint32_t rs1,
                              vuint32m4_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsub_mu(vbool4_t vm, vuint32m8_t vd, vuint32m8_t vs1,
                              vuint32m8_t vs2, size_t vl);
vuint32m8_t __riscv_vnmsub_mu(vbool4_t vm, vuint32m8_t vd, uint32_t rs1,
                              vuint32m8_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsub_mu(vbool64_t vm, vuint64m1_t vd, vuint64m1_t vs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m1_t __riscv_vnmsub_mu(vbool64_t vm, vuint64m1_t vd, uint64_t rs1,
                              vuint64m1_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsub_mu(vbool32_t vm, vuint64m2_t vd, vuint64m2_t vs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m2_t __riscv_vnmsub_mu(vbool32_t vm, vuint64m2_t vd, uint64_t rs1,
                              vuint64m2_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsub_mu(vbool16_t vm, vuint64m4_t vd, vuint64m4_t vs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m4_t __riscv_vnmsub_mu(vbool16_t vm, vuint64m4_t vd, uint64_t rs1,
                              vuint64m4_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsub_mu(vbool8_t vm, vuint64m8_t vd, vuint64m8_t vs1,
                              vuint64m8_t vs2, size_t vl);
vuint64m8_t __riscv_vnmsub_mu(vbool8_t vm, vuint64m8_t vd, uint64_t rs1,
                              vuint64m8_t vs2, size_t vl);

Vector Widening Integer Multiply-Add Intrinsics

vint16mf4_t __riscv_vwmacc_tu(vint16mf4_t vd, vint8mf8_t vs1, vint8mf8_t vs2,
                              size_t vl);
vint16mf4_t __riscv_vwmacc_tu(vint16mf4_t vd, int8_t rs1, vint8mf8_t vs2,
                              size_t vl);
vint16mf2_t __riscv_vwmacc_tu(vint16mf2_t vd, vint8mf4_t vs1, vint8mf4_t vs2,
                              size_t vl);
vint16mf2_t __riscv_vwmacc_tu(vint16mf2_t vd, int8_t rs1, vint8mf4_t vs2,
                              size_t vl);
vint16m1_t __riscv_vwmacc_tu(vint16m1_t vd, vint8mf2_t vs1, vint8mf2_t vs2,
                             size_t vl);
vint16m1_t __riscv_vwmacc_tu(vint16m1_t vd, int8_t rs1, vint8mf2_t vs2,
                             size_t vl);
vint16m2_t __riscv_vwmacc_tu(vint16m2_t vd, vint8m1_t vs1, vint8m1_t vs2,
                             size_t vl);
vint16m2_t __riscv_vwmacc_tu(vint16m2_t vd, int8_t rs1, vint8m1_t vs2,
                             size_t vl);
vint16m4_t __riscv_vwmacc_tu(vint16m4_t vd, vint8m2_t vs1, vint8m2_t vs2,
                             size_t vl);
vint16m4_t __riscv_vwmacc_tu(vint16m4_t vd, int8_t rs1, vint8m2_t vs2,
                             size_t vl);
vint16m8_t __riscv_vwmacc_tu(vint16m8_t vd, vint8m4_t vs1, vint8m4_t vs2,
                             size_t vl);
vint16m8_t __riscv_vwmacc_tu(vint16m8_t vd, int8_t rs1, vint8m4_t vs2,
                             size_t vl);
vint32mf2_t __riscv_vwmacc_tu(vint32mf2_t vd, vint16mf4_t vs1, vint16mf4_t vs2,
                              size_t vl);
vint32mf2_t __riscv_vwmacc_tu(vint32mf2_t vd, int16_t rs1, vint16mf4_t vs2,
                              size_t vl);
vint32m1_t __riscv_vwmacc_tu(vint32m1_t vd, vint16mf2_t vs1, vint16mf2_t vs2,
                             size_t vl);
vint32m1_t __riscv_vwmacc_tu(vint32m1_t vd, int16_t rs1, vint16mf2_t vs2,
                             size_t vl);
vint32m2_t __riscv_vwmacc_tu(vint32m2_t vd, vint16m1_t vs1, vint16m1_t vs2,
                             size_t vl);
vint32m2_t __riscv_vwmacc_tu(vint32m2_t vd, int16_t rs1, vint16m1_t vs2,
                             size_t vl);
vint32m4_t __riscv_vwmacc_tu(vint32m4_t vd, vint16m2_t vs1, vint16m2_t vs2,
                             size_t vl);
vint32m4_t __riscv_vwmacc_tu(vint32m4_t vd, int16_t rs1, vint16m2_t vs2,
                             size_t vl);
vint32m8_t __riscv_vwmacc_tu(vint32m8_t vd, vint16m4_t vs1, vint16m4_t vs2,
                             size_t vl);
vint32m8_t __riscv_vwmacc_tu(vint32m8_t vd, int16_t rs1, vint16m4_t vs2,
                             size_t vl);
vint64m1_t __riscv_vwmacc_tu(vint64m1_t vd, vint32mf2_t vs1, vint32mf2_t vs2,
                             size_t vl);
vint64m1_t __riscv_vwmacc_tu(vint64m1_t vd, int32_t rs1, vint32mf2_t vs2,
                             size_t vl);
vint64m2_t __riscv_vwmacc_tu(vint64m2_t vd, vint32m1_t vs1, vint32m1_t vs2,
                             size_t vl);
vint64m2_t __riscv_vwmacc_tu(vint64m2_t vd, int32_t rs1, vint32m1_t vs2,
                             size_t vl);
vint64m4_t __riscv_vwmacc_tu(vint64m4_t vd, vint32m2_t vs1, vint32m2_t vs2,
                             size_t vl);
vint64m4_t __riscv_vwmacc_tu(vint64m4_t vd, int32_t rs1, vint32m2_t vs2,
                             size_t vl);
vint64m8_t __riscv_vwmacc_tu(vint64m8_t vd, vint32m4_t vs1, vint32m4_t vs2,
                             size_t vl);
vint64m8_t __riscv_vwmacc_tu(vint64m8_t vd, int32_t rs1, vint32m4_t vs2,
                             size_t vl);
vint16mf4_t __riscv_vwmaccsu_tu(vint16mf4_t vd, vint8mf8_t vs1, vuint8mf8_t vs2,
                                size_t vl);
vint16mf4_t __riscv_vwmaccsu_tu(vint16mf4_t vd, int8_t rs1, vuint8mf8_t vs2,
                                size_t vl);
vint16mf2_t __riscv_vwmaccsu_tu(vint16mf2_t vd, vint8mf4_t vs1, vuint8mf4_t vs2,
                                size_t vl);
vint16mf2_t __riscv_vwmaccsu_tu(vint16mf2_t vd, int8_t rs1, vuint8mf4_t vs2,
                                size_t vl);
vint16m1_t __riscv_vwmaccsu_tu(vint16m1_t vd, vint8mf2_t vs1, vuint8mf2_t vs2,
                               size_t vl);
vint16m1_t __riscv_vwmaccsu_tu(vint16m1_t vd, int8_t rs1, vuint8mf2_t vs2,
                               size_t vl);
vint16m2_t __riscv_vwmaccsu_tu(vint16m2_t vd, vint8m1_t vs1, vuint8m1_t vs2,
                               size_t vl);
vint16m2_t __riscv_vwmaccsu_tu(vint16m2_t vd, int8_t rs1, vuint8m1_t vs2,
                               size_t vl);
vint16m4_t __riscv_vwmaccsu_tu(vint16m4_t vd, vint8m2_t vs1, vuint8m2_t vs2,
                               size_t vl);
vint16m4_t __riscv_vwmaccsu_tu(vint16m4_t vd, int8_t rs1, vuint8m2_t vs2,
                               size_t vl);
vint16m8_t __riscv_vwmaccsu_tu(vint16m8_t vd, vint8m4_t vs1, vuint8m4_t vs2,
                               size_t vl);
vint16m8_t __riscv_vwmaccsu_tu(vint16m8_t vd, int8_t rs1, vuint8m4_t vs2,
                               size_t vl);
vint32mf2_t __riscv_vwmaccsu_tu(vint32mf2_t vd, vint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccsu_tu(vint32mf2_t vd, int16_t rs1, vuint16mf4_t vs2,
                                size_t vl);
vint32m1_t __riscv_vwmaccsu_tu(vint32m1_t vd, vint16mf2_t vs1, vuint16mf2_t vs2,
                               size_t vl);
vint32m1_t __riscv_vwmaccsu_tu(vint32m1_t vd, int16_t rs1, vuint16mf2_t vs2,
                               size_t vl);
vint32m2_t __riscv_vwmaccsu_tu(vint32m2_t vd, vint16m1_t vs1, vuint16m1_t vs2,
                               size_t vl);
vint32m2_t __riscv_vwmaccsu_tu(vint32m2_t vd, int16_t rs1, vuint16m1_t vs2,
                               size_t vl);
vint32m4_t __riscv_vwmaccsu_tu(vint32m4_t vd, vint16m2_t vs1, vuint16m2_t vs2,
                               size_t vl);
vint32m4_t __riscv_vwmaccsu_tu(vint32m4_t vd, int16_t rs1, vuint16m2_t vs2,
                               size_t vl);
vint32m8_t __riscv_vwmaccsu_tu(vint32m8_t vd, vint16m4_t vs1, vuint16m4_t vs2,
                               size_t vl);
vint32m8_t __riscv_vwmaccsu_tu(vint32m8_t vd, int16_t rs1, vuint16m4_t vs2,
                               size_t vl);
vint64m1_t __riscv_vwmaccsu_tu(vint64m1_t vd, vint32mf2_t vs1, vuint32mf2_t vs2,
                               size_t vl);
vint64m1_t __riscv_vwmaccsu_tu(vint64m1_t vd, int32_t rs1, vuint32mf2_t vs2,
                               size_t vl);
vint64m2_t __riscv_vwmaccsu_tu(vint64m2_t vd, vint32m1_t vs1, vuint32m1_t vs2,
                               size_t vl);
vint64m2_t __riscv_vwmaccsu_tu(vint64m2_t vd, int32_t rs1, vuint32m1_t vs2,
                               size_t vl);
vint64m4_t __riscv_vwmaccsu_tu(vint64m4_t vd, vint32m2_t vs1, vuint32m2_t vs2,
                               size_t vl);
vint64m4_t __riscv_vwmaccsu_tu(vint64m4_t vd, int32_t rs1, vuint32m2_t vs2,
                               size_t vl);
vint64m8_t __riscv_vwmaccsu_tu(vint64m8_t vd, vint32m4_t vs1, vuint32m4_t vs2,
                               size_t vl);
vint64m8_t __riscv_vwmaccsu_tu(vint64m8_t vd, int32_t rs1, vuint32m4_t vs2,
                               size_t vl);
vint16mf4_t __riscv_vwmaccus_tu(vint16mf4_t vd, uint8_t rs1, vint8mf8_t vs2,
                                size_t vl);
vint16mf2_t __riscv_vwmaccus_tu(vint16mf2_t vd, uint8_t rs1, vint8mf4_t vs2,
                                size_t vl);
vint16m1_t __riscv_vwmaccus_tu(vint16m1_t vd, uint8_t rs1, vint8mf2_t vs2,
                               size_t vl);
vint16m2_t __riscv_vwmaccus_tu(vint16m2_t vd, uint8_t rs1, vint8m1_t vs2,
                               size_t vl);
vint16m4_t __riscv_vwmaccus_tu(vint16m4_t vd, uint8_t rs1, vint8m2_t vs2,
                               size_t vl);
vint16m8_t __riscv_vwmaccus_tu(vint16m8_t vd, uint8_t rs1, vint8m4_t vs2,
                               size_t vl);
vint32mf2_t __riscv_vwmaccus_tu(vint32mf2_t vd, uint16_t rs1, vint16mf4_t vs2,
                                size_t vl);
vint32m1_t __riscv_vwmaccus_tu(vint32m1_t vd, uint16_t rs1, vint16mf2_t vs2,
                               size_t vl);
vint32m2_t __riscv_vwmaccus_tu(vint32m2_t vd, uint16_t rs1, vint16m1_t vs2,
                               size_t vl);
vint32m4_t __riscv_vwmaccus_tu(vint32m4_t vd, uint16_t rs1, vint16m2_t vs2,
                               size_t vl);
vint32m8_t __riscv_vwmaccus_tu(vint32m8_t vd, uint16_t rs1, vint16m4_t vs2,
                               size_t vl);
vint64m1_t __riscv_vwmaccus_tu(vint64m1_t vd, uint32_t rs1, vint32mf2_t vs2,
                               size_t vl);
vint64m2_t __riscv_vwmaccus_tu(vint64m2_t vd, uint32_t rs1, vint32m1_t vs2,
                               size_t vl);
vint64m4_t __riscv_vwmaccus_tu(vint64m4_t vd, uint32_t rs1, vint32m2_t vs2,
                               size_t vl);
vint64m8_t __riscv_vwmaccus_tu(vint64m8_t vd, uint32_t rs1, vint32m4_t vs2,
                               size_t vl);
vuint16mf4_t __riscv_vwmaccu_tu(vuint16mf4_t vd, vuint8mf8_t vs1,
                                vuint8mf8_t vs2, size_t vl);
vuint16mf4_t __riscv_vwmaccu_tu(vuint16mf4_t vd, uint8_t rs1, vuint8mf8_t vs2,
                                size_t vl);
vuint16mf2_t __riscv_vwmaccu_tu(vuint16mf2_t vd, vuint8mf4_t vs1,
                                vuint8mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vwmaccu_tu(vuint16mf2_t vd, uint8_t rs1, vuint8mf4_t vs2,
                                size_t vl);
vuint16m1_t __riscv_vwmaccu_tu(vuint16m1_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2,
                               size_t vl);
vuint16m1_t __riscv_vwmaccu_tu(vuint16m1_t vd, uint8_t rs1, vuint8mf2_t vs2,
                               size_t vl);
vuint16m2_t __riscv_vwmaccu_tu(vuint16m2_t vd, vuint8m1_t vs1, vuint8m1_t vs2,
                               size_t vl);
vuint16m2_t __riscv_vwmaccu_tu(vuint16m2_t vd, uint8_t rs1, vuint8m1_t vs2,
                               size_t vl);
vuint16m4_t __riscv_vwmaccu_tu(vuint16m4_t vd, vuint8m2_t vs1, vuint8m2_t vs2,
                               size_t vl);
vuint16m4_t __riscv_vwmaccu_tu(vuint16m4_t vd, uint8_t rs1, vuint8m2_t vs2,
                               size_t vl);
vuint16m8_t __riscv_vwmaccu_tu(vuint16m8_t vd, vuint8m4_t vs1, vuint8m4_t vs2,
                               size_t vl);
vuint16m8_t __riscv_vwmaccu_tu(vuint16m8_t vd, uint8_t rs1, vuint8m4_t vs2,
                               size_t vl);
vuint32mf2_t __riscv_vwmaccu_tu(vuint32mf2_t vd, vuint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vuint32mf2_t __riscv_vwmaccu_tu(vuint32mf2_t vd, uint16_t rs1, vuint16mf4_t vs2,
                                size_t vl);
vuint32m1_t __riscv_vwmaccu_tu(vuint32m1_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vwmaccu_tu(vuint32m1_t vd, uint16_t rs1, vuint16mf2_t vs2,
                               size_t vl);
vuint32m2_t __riscv_vwmaccu_tu(vuint32m2_t vd, vuint16m1_t vs1, vuint16m1_t vs2,
                               size_t vl);
vuint32m2_t __riscv_vwmaccu_tu(vuint32m2_t vd, uint16_t rs1, vuint16m1_t vs2,
                               size_t vl);
vuint32m4_t __riscv_vwmaccu_tu(vuint32m4_t vd, vuint16m2_t vs1, vuint16m2_t vs2,
                               size_t vl);
vuint32m4_t __riscv_vwmaccu_tu(vuint32m4_t vd, uint16_t rs1, vuint16m2_t vs2,
                               size_t vl);
vuint32m8_t __riscv_vwmaccu_tu(vuint32m8_t vd, vuint16m4_t vs1, vuint16m4_t vs2,
                               size_t vl);
vuint32m8_t __riscv_vwmaccu_tu(vuint32m8_t vd, uint16_t rs1, vuint16m4_t vs2,
                               size_t vl);
vuint64m1_t __riscv_vwmaccu_tu(vuint64m1_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint64m1_t __riscv_vwmaccu_tu(vuint64m1_t vd, uint32_t rs1, vuint32mf2_t vs2,
                               size_t vl);
vuint64m2_t __riscv_vwmaccu_tu(vuint64m2_t vd, vuint32m1_t vs1, vuint32m1_t vs2,
                               size_t vl);
vuint64m2_t __riscv_vwmaccu_tu(vuint64m2_t vd, uint32_t rs1, vuint32m1_t vs2,
                               size_t vl);
vuint64m4_t __riscv_vwmaccu_tu(vuint64m4_t vd, vuint32m2_t vs1, vuint32m2_t vs2,
                               size_t vl);
vuint64m4_t __riscv_vwmaccu_tu(vuint64m4_t vd, uint32_t rs1, vuint32m2_t vs2,
                               size_t vl);
vuint64m8_t __riscv_vwmaccu_tu(vuint64m8_t vd, vuint32m4_t vs1, vuint32m4_t vs2,
                               size_t vl);
vuint64m8_t __riscv_vwmaccu_tu(vuint64m8_t vd, uint32_t rs1, vuint32m4_t vs2,
                               size_t vl);
// masked functions
vint16mf4_t __riscv_vwmacc_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs1,
                               vint8mf8_t vs2, size_t vl);
vint16mf4_t __riscv_vwmacc_tum(vbool64_t vm, vint16mf4_t vd, int8_t rs1,
                               vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmacc_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs1,
                               vint8mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vwmacc_tum(vbool32_t vm, vint16mf2_t vd, int8_t rs1,
                               vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmacc_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs1,
                              vint8mf2_t vs2, size_t vl);
vint16m1_t __riscv_vwmacc_tum(vbool16_t vm, vint16m1_t vd, int8_t rs1,
                              vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmacc_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs1,
                              vint8m1_t vs2, size_t vl);
vint16m2_t __riscv_vwmacc_tum(vbool8_t vm, vint16m2_t vd, int8_t rs1,
                              vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmacc_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs1,
                              vint8m2_t vs2, size_t vl);
vint16m4_t __riscv_vwmacc_tum(vbool4_t vm, vint16m4_t vd, int8_t rs1,
                              vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmacc_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs1,
                              vint8m4_t vs2, size_t vl);
vint16m8_t __riscv_vwmacc_tum(vbool2_t vm, vint16m8_t vd, int8_t rs1,
                              vint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmacc_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs1,
                               vint16mf4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmacc_tum(vbool64_t vm, vint32mf2_t vd, int16_t rs1,
                               vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmacc_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs1,
                              vint16mf2_t vs2, size_t vl);
vint32m1_t __riscv_vwmacc_tum(vbool32_t vm, vint32m1_t vd, int16_t rs1,
                              vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmacc_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs1,
                              vint16m1_t vs2, size_t vl);
vint32m2_t __riscv_vwmacc_tum(vbool16_t vm, vint32m2_t vd, int16_t rs1,
                              vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmacc_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs1,
                              vint16m2_t vs2, size_t vl);
vint32m4_t __riscv_vwmacc_tum(vbool8_t vm, vint32m4_t vd, int16_t rs1,
                              vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmacc_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs1,
                              vint16m4_t vs2, size_t vl);
vint32m8_t __riscv_vwmacc_tum(vbool4_t vm, vint32m8_t vd, int16_t rs1,
                              vint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmacc_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs1,
                              vint32mf2_t vs2, size_t vl);
vint64m1_t __riscv_vwmacc_tum(vbool64_t vm, vint64m1_t vd, int32_t rs1,
                              vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmacc_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs1,
                              vint32m1_t vs2, size_t vl);
vint64m2_t __riscv_vwmacc_tum(vbool32_t vm, vint64m2_t vd, int32_t rs1,
                              vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmacc_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs1,
                              vint32m2_t vs2, size_t vl);
vint64m4_t __riscv_vwmacc_tum(vbool16_t vm, vint64m4_t vd, int32_t rs1,
                              vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmacc_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs1,
                              vint32m4_t vs2, size_t vl);
vint64m8_t __riscv_vwmacc_tum(vbool8_t vm, vint64m8_t vd, int32_t rs1,
                              vint32m4_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccsu_tum(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs1,
                                 vuint8mf8_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccsu_tum(vbool64_t vm, vint16mf4_t vd, int8_t rs1,
                                 vuint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccsu_tum(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs1,
                                 vuint8mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccsu_tum(vbool32_t vm, vint16mf2_t vd, int8_t rs1,
                                 vuint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccsu_tum(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs1,
                                vuint8mf2_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccsu_tum(vbool16_t vm, vint16m1_t vd, int8_t rs1,
                                vuint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccsu_tum(vbool8_t vm, vint16m2_t vd, vint8m1_t vs1,
                                vuint8m1_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccsu_tum(vbool8_t vm, vint16m2_t vd, int8_t rs1,
                                vuint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccsu_tum(vbool4_t vm, vint16m4_t vd, vint8m2_t vs1,
                                vuint8m2_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccsu_tum(vbool4_t vm, vint16m4_t vd, int8_t rs1,
                                vuint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccsu_tum(vbool2_t vm, vint16m8_t vd, vint8m4_t vs1,
                                vuint8m4_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccsu_tum(vbool2_t vm, vint16m8_t vd, int8_t rs1,
                                vuint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccsu_tum(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs1,
                                 vuint16mf4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccsu_tum(vbool64_t vm, vint32mf2_t vd, int16_t rs1,
                                 vuint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccsu_tum(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs1,
                                vuint16mf2_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccsu_tum(vbool32_t vm, vint32m1_t vd, int16_t rs1,
                                vuint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccsu_tum(vbool16_t vm, vint32m2_t vd, vint16m1_t vs1,
                                vuint16m1_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccsu_tum(vbool16_t vm, vint32m2_t vd, int16_t rs1,
                                vuint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccsu_tum(vbool8_t vm, vint32m4_t vd, vint16m2_t vs1,
                                vuint16m2_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccsu_tum(vbool8_t vm, vint32m4_t vd, int16_t rs1,
                                vuint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccsu_tum(vbool4_t vm, vint32m8_t vd, vint16m4_t vs1,
                                vuint16m4_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccsu_tum(vbool4_t vm, vint32m8_t vd, int16_t rs1,
                                vuint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccsu_tum(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs1,
                                vuint32mf2_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccsu_tum(vbool64_t vm, vint64m1_t vd, int32_t rs1,
                                vuint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccsu_tum(vbool32_t vm, vint64m2_t vd, vint32m1_t vs1,
                                vuint32m1_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccsu_tum(vbool32_t vm, vint64m2_t vd, int32_t rs1,
                                vuint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccsu_tum(vbool16_t vm, vint64m4_t vd, vint32m2_t vs1,
                                vuint32m2_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccsu_tum(vbool16_t vm, vint64m4_t vd, int32_t rs1,
                                vuint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccsu_tum(vbool8_t vm, vint64m8_t vd, vint32m4_t vs1,
                                vuint32m4_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccsu_tum(vbool8_t vm, vint64m8_t vd, int32_t rs1,
                                vuint32m4_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccus_tum(vbool64_t vm, vint16mf4_t vd, uint8_t rs1,
                                 vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccus_tum(vbool32_t vm, vint16mf2_t vd, uint8_t rs1,
                                 vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccus_tum(vbool16_t vm, vint16m1_t vd, uint8_t rs1,
                                vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccus_tum(vbool8_t vm, vint16m2_t vd, uint8_t rs1,
                                vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccus_tum(vbool4_t vm, vint16m4_t vd, uint8_t rs1,
                                vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccus_tum(vbool2_t vm, vint16m8_t vd, uint8_t rs1,
                                vint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccus_tum(vbool64_t vm, vint32mf2_t vd, uint16_t rs1,
                                 vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccus_tum(vbool32_t vm, vint32m1_t vd, uint16_t rs1,
                                vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccus_tum(vbool16_t vm, vint32m2_t vd, uint16_t rs1,
                                vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccus_tum(vbool8_t vm, vint32m4_t vd, uint16_t rs1,
                                vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccus_tum(vbool4_t vm, vint32m8_t vd, uint16_t rs1,
                                vint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccus_tum(vbool64_t vm, vint64m1_t vd, uint32_t rs1,
                                vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccus_tum(vbool32_t vm, vint64m2_t vd, uint32_t rs1,
                                vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccus_tum(vbool16_t vm, vint64m4_t vd, uint32_t rs1,
                                vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccus_tum(vbool8_t vm, vint64m8_t vd, uint32_t rs1,
                                vint32m4_t vs2, size_t vl);
vuint16mf4_t __riscv_vwmaccu_tum(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs1,
                                 vuint8mf8_t vs2, size_t vl);
vuint16mf4_t __riscv_vwmaccu_tum(vbool64_t vm, vuint16mf4_t vd, uint8_t rs1,
                                 vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vwmaccu_tum(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs1,
                                 vuint8mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vwmaccu_tum(vbool32_t vm, vuint16mf2_t vd, uint8_t rs1,
                                 vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vwmaccu_tum(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs1,
                                vuint8mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vwmaccu_tum(vbool16_t vm, vuint16m1_t vd, uint8_t rs1,
                                vuint8mf2_t vs2, size_t vl);
vuint16m2_t __riscv_vwmaccu_tum(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs1,
                                vuint8m1_t vs2, size_t vl);
vuint16m2_t __riscv_vwmaccu_tum(vbool8_t vm, vuint16m2_t vd, uint8_t rs1,
                                vuint8m1_t vs2, size_t vl);
vuint16m4_t __riscv_vwmaccu_tum(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs1,
                                vuint8m2_t vs2, size_t vl);
vuint16m4_t __riscv_vwmaccu_tum(vbool4_t vm, vuint16m4_t vd, uint8_t rs1,
                                vuint8m2_t vs2, size_t vl);
vuint16m8_t __riscv_vwmaccu_tum(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs1,
                                vuint8m4_t vs2, size_t vl);
vuint16m8_t __riscv_vwmaccu_tum(vbool2_t vm, vuint16m8_t vd, uint8_t rs1,
                                vuint8m4_t vs2, size_t vl);
vuint32mf2_t __riscv_vwmaccu_tum(vbool64_t vm, vuint32mf2_t vd,
                                 vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl);
vuint32mf2_t __riscv_vwmaccu_tum(vbool64_t vm, vuint32mf2_t vd, uint16_t rs1,
                                 vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vwmaccu_tum(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs1,
                                vuint16mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vwmaccu_tum(vbool32_t vm, vuint32m1_t vd, uint16_t rs1,
                                vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vwmaccu_tum(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs1,
                                vuint16m1_t vs2, size_t vl);
vuint32m2_t __riscv_vwmaccu_tum(vbool16_t vm, vuint32m2_t vd, uint16_t rs1,
                                vuint16m1_t vs2, size_t vl);
vuint32m4_t __riscv_vwmaccu_tum(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs1,
                                vuint16m2_t vs2, size_t vl);
vuint32m4_t __riscv_vwmaccu_tum(vbool8_t vm, vuint32m4_t vd, uint16_t rs1,
                                vuint16m2_t vs2, size_t vl);
vuint32m8_t __riscv_vwmaccu_tum(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs1,
                                vuint16m4_t vs2, size_t vl);
vuint32m8_t __riscv_vwmaccu_tum(vbool4_t vm, vuint32m8_t vd, uint16_t rs1,
                                vuint16m4_t vs2, size_t vl);
vuint64m1_t __riscv_vwmaccu_tum(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs1,
                                vuint32mf2_t vs2, size_t vl);
vuint64m1_t __riscv_vwmaccu_tum(vbool64_t vm, vuint64m1_t vd, uint32_t rs1,
                                vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vwmaccu_tum(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs1,
                                vuint32m1_t vs2, size_t vl);
vuint64m2_t __riscv_vwmaccu_tum(vbool32_t vm, vuint64m2_t vd, uint32_t rs1,
                                vuint32m1_t vs2, size_t vl);
vuint64m4_t __riscv_vwmaccu_tum(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs1,
                                vuint32m2_t vs2, size_t vl);
vuint64m4_t __riscv_vwmaccu_tum(vbool16_t vm, vuint64m4_t vd, uint32_t rs1,
                                vuint32m2_t vs2, size_t vl);
vuint64m8_t __riscv_vwmaccu_tum(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs1,
                                vuint32m4_t vs2, size_t vl);
vuint64m8_t __riscv_vwmaccu_tum(vbool8_t vm, vuint64m8_t vd, uint32_t rs1,
                                vuint32m4_t vs2, size_t vl);
// masked functions
vint16mf4_t __riscv_vwmacc_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs1,
                                vint8mf8_t vs2, size_t vl);
vint16mf4_t __riscv_vwmacc_tumu(vbool64_t vm, vint16mf4_t vd, int8_t rs1,
                                vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmacc_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs1,
                                vint8mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vwmacc_tumu(vbool32_t vm, vint16mf2_t vd, int8_t rs1,
                                vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmacc_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs1,
                               vint8mf2_t vs2, size_t vl);
vint16m1_t __riscv_vwmacc_tumu(vbool16_t vm, vint16m1_t vd, int8_t rs1,
                               vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmacc_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs1,
                               vint8m1_t vs2, size_t vl);
vint16m2_t __riscv_vwmacc_tumu(vbool8_t vm, vint16m2_t vd, int8_t rs1,
                               vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmacc_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs1,
                               vint8m2_t vs2, size_t vl);
vint16m4_t __riscv_vwmacc_tumu(vbool4_t vm, vint16m4_t vd, int8_t rs1,
                               vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmacc_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs1,
                               vint8m4_t vs2, size_t vl);
vint16m8_t __riscv_vwmacc_tumu(vbool2_t vm, vint16m8_t vd, int8_t rs1,
                               vint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmacc_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs1,
                                vint16mf4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmacc_tumu(vbool64_t vm, vint32mf2_t vd, int16_t rs1,
                                vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmacc_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs1,
                               vint16mf2_t vs2, size_t vl);
vint32m1_t __riscv_vwmacc_tumu(vbool32_t vm, vint32m1_t vd, int16_t rs1,
                               vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmacc_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs1,
                               vint16m1_t vs2, size_t vl);
vint32m2_t __riscv_vwmacc_tumu(vbool16_t vm, vint32m2_t vd, int16_t rs1,
                               vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmacc_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs1,
                               vint16m2_t vs2, size_t vl);
vint32m4_t __riscv_vwmacc_tumu(vbool8_t vm, vint32m4_t vd, int16_t rs1,
                               vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmacc_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs1,
                               vint16m4_t vs2, size_t vl);
vint32m8_t __riscv_vwmacc_tumu(vbool4_t vm, vint32m8_t vd, int16_t rs1,
                               vint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmacc_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs1,
                               vint32mf2_t vs2, size_t vl);
vint64m1_t __riscv_vwmacc_tumu(vbool64_t vm, vint64m1_t vd, int32_t rs1,
                               vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmacc_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs1,
                               vint32m1_t vs2, size_t vl);
vint64m2_t __riscv_vwmacc_tumu(vbool32_t vm, vint64m2_t vd, int32_t rs1,
                               vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmacc_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs1,
                               vint32m2_t vs2, size_t vl);
vint64m4_t __riscv_vwmacc_tumu(vbool16_t vm, vint64m4_t vd, int32_t rs1,
                               vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmacc_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs1,
                               vint32m4_t vs2, size_t vl);
vint64m8_t __riscv_vwmacc_tumu(vbool8_t vm, vint64m8_t vd, int32_t rs1,
                               vint32m4_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccsu_tumu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs1,
                                  vuint8mf8_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccsu_tumu(vbool64_t vm, vint16mf4_t vd, int8_t rs1,
                                  vuint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccsu_tumu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs1,
                                  vuint8mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccsu_tumu(vbool32_t vm, vint16mf2_t vd, int8_t rs1,
                                  vuint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccsu_tumu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs1,
                                 vuint8mf2_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccsu_tumu(vbool16_t vm, vint16m1_t vd, int8_t rs1,
                                 vuint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccsu_tumu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs1,
                                 vuint8m1_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccsu_tumu(vbool8_t vm, vint16m2_t vd, int8_t rs1,
                                 vuint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccsu_tumu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs1,
                                 vuint8m2_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccsu_tumu(vbool4_t vm, vint16m4_t vd, int8_t rs1,
                                 vuint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccsu_tumu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs1,
                                 vuint8m4_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccsu_tumu(vbool2_t vm, vint16m8_t vd, int8_t rs1,
                                 vuint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccsu_tumu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs1,
                                  vuint16mf4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccsu_tumu(vbool64_t vm, vint32mf2_t vd, int16_t rs1,
                                  vuint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccsu_tumu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs1,
                                 vuint16mf2_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccsu_tumu(vbool32_t vm, vint32m1_t vd, int16_t rs1,
                                 vuint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccsu_tumu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs1,
                                 vuint16m1_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccsu_tumu(vbool16_t vm, vint32m2_t vd, int16_t rs1,
                                 vuint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccsu_tumu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs1,
                                 vuint16m2_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccsu_tumu(vbool8_t vm, vint32m4_t vd, int16_t rs1,
                                 vuint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccsu_tumu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs1,
                                 vuint16m4_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccsu_tumu(vbool4_t vm, vint32m8_t vd, int16_t rs1,
                                 vuint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccsu_tumu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs1,
                                 vuint32mf2_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccsu_tumu(vbool64_t vm, vint64m1_t vd, int32_t rs1,
                                 vuint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccsu_tumu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs1,
                                 vuint32m1_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccsu_tumu(vbool32_t vm, vint64m2_t vd, int32_t rs1,
                                 vuint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccsu_tumu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs1,
                                 vuint32m2_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccsu_tumu(vbool16_t vm, vint64m4_t vd, int32_t rs1,
                                 vuint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccsu_tumu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs1,
                                 vuint32m4_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccsu_tumu(vbool8_t vm, vint64m8_t vd, int32_t rs1,
                                 vuint32m4_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccus_tumu(vbool64_t vm, vint16mf4_t vd, uint8_t rs1,
                                  vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccus_tumu(vbool32_t vm, vint16mf2_t vd, uint8_t rs1,
                                  vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccus_tumu(vbool16_t vm, vint16m1_t vd, uint8_t rs1,
                                 vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccus_tumu(vbool8_t vm, vint16m2_t vd, uint8_t rs1,
                                 vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccus_tumu(vbool4_t vm, vint16m4_t vd, uint8_t rs1,
                                 vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccus_tumu(vbool2_t vm, vint16m8_t vd, uint8_t rs1,
                                 vint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccus_tumu(vbool64_t vm, vint32mf2_t vd, uint16_t rs1,
                                  vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccus_tumu(vbool32_t vm, vint32m1_t vd, uint16_t rs1,
                                 vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccus_tumu(vbool16_t vm, vint32m2_t vd, uint16_t rs1,
                                 vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccus_tumu(vbool8_t vm, vint32m4_t vd, uint16_t rs1,
                                 vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccus_tumu(vbool4_t vm, vint32m8_t vd, uint16_t rs1,
                                 vint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccus_tumu(vbool64_t vm, vint64m1_t vd, uint32_t rs1,
                                 vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccus_tumu(vbool32_t vm, vint64m2_t vd, uint32_t rs1,
                                 vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccus_tumu(vbool16_t vm, vint64m4_t vd, uint32_t rs1,
                                 vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccus_tumu(vbool8_t vm, vint64m8_t vd, uint32_t rs1,
                                 vint32m4_t vs2, size_t vl);
vuint16mf4_t __riscv_vwmaccu_tumu(vbool64_t vm, vuint16mf4_t vd,
                                  vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl);
vuint16mf4_t __riscv_vwmaccu_tumu(vbool64_t vm, vuint16mf4_t vd, uint8_t rs1,
                                  vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vwmaccu_tumu(vbool32_t vm, vuint16mf2_t vd,
                                  vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vwmaccu_tumu(vbool32_t vm, vuint16mf2_t vd, uint8_t rs1,
                                  vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vwmaccu_tumu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs1,
                                 vuint8mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vwmaccu_tumu(vbool16_t vm, vuint16m1_t vd, uint8_t rs1,
                                 vuint8mf2_t vs2, size_t vl);
vuint16m2_t __riscv_vwmaccu_tumu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs1,
                                 vuint8m1_t vs2, size_t vl);
vuint16m2_t __riscv_vwmaccu_tumu(vbool8_t vm, vuint16m2_t vd, uint8_t rs1,
                                 vuint8m1_t vs2, size_t vl);
vuint16m4_t __riscv_vwmaccu_tumu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs1,
                                 vuint8m2_t vs2, size_t vl);
vuint16m4_t __riscv_vwmaccu_tumu(vbool4_t vm, vuint16m4_t vd, uint8_t rs1,
                                 vuint8m2_t vs2, size_t vl);
vuint16m8_t __riscv_vwmaccu_tumu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs1,
                                 vuint8m4_t vs2, size_t vl);
vuint16m8_t __riscv_vwmaccu_tumu(vbool2_t vm, vuint16m8_t vd, uint8_t rs1,
                                 vuint8m4_t vs2, size_t vl);
vuint32mf2_t __riscv_vwmaccu_tumu(vbool64_t vm, vuint32mf2_t vd,
                                  vuint16mf4_t vs1, vuint16mf4_t vs2,
                                  size_t vl);
vuint32mf2_t __riscv_vwmaccu_tumu(vbool64_t vm, vuint32mf2_t vd, uint16_t rs1,
                                  vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vwmaccu_tumu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs1,
                                 vuint16mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vwmaccu_tumu(vbool32_t vm, vuint32m1_t vd, uint16_t rs1,
                                 vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vwmaccu_tumu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs1,
                                 vuint16m1_t vs2, size_t vl);
vuint32m2_t __riscv_vwmaccu_tumu(vbool16_t vm, vuint32m2_t vd, uint16_t rs1,
                                 vuint16m1_t vs2, size_t vl);
vuint32m4_t __riscv_vwmaccu_tumu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs1,
                                 vuint16m2_t vs2, size_t vl);
vuint32m4_t __riscv_vwmaccu_tumu(vbool8_t vm, vuint32m4_t vd, uint16_t rs1,
                                 vuint16m2_t vs2, size_t vl);
vuint32m8_t __riscv_vwmaccu_tumu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs1,
                                 vuint16m4_t vs2, size_t vl);
vuint32m8_t __riscv_vwmaccu_tumu(vbool4_t vm, vuint32m8_t vd, uint16_t rs1,
                                 vuint16m4_t vs2, size_t vl);
vuint64m1_t __riscv_vwmaccu_tumu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs1,
                                 vuint32mf2_t vs2, size_t vl);
vuint64m1_t __riscv_vwmaccu_tumu(vbool64_t vm, vuint64m1_t vd, uint32_t rs1,
                                 vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vwmaccu_tumu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs1,
                                 vuint32m1_t vs2, size_t vl);
vuint64m2_t __riscv_vwmaccu_tumu(vbool32_t vm, vuint64m2_t vd, uint32_t rs1,
                                 vuint32m1_t vs2, size_t vl);
vuint64m4_t __riscv_vwmaccu_tumu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs1,
                                 vuint32m2_t vs2, size_t vl);
vuint64m4_t __riscv_vwmaccu_tumu(vbool16_t vm, vuint64m4_t vd, uint32_t rs1,
                                 vuint32m2_t vs2, size_t vl);
vuint64m8_t __riscv_vwmaccu_tumu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs1,
                                 vuint32m4_t vs2, size_t vl);
vuint64m8_t __riscv_vwmaccu_tumu(vbool8_t vm, vuint64m8_t vd, uint32_t rs1,
                                 vuint32m4_t vs2, size_t vl);
// masked functions
vint16mf4_t __riscv_vwmacc_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs1,
                              vint8mf8_t vs2, size_t vl);
vint16mf4_t __riscv_vwmacc_mu(vbool64_t vm, vint16mf4_t vd, int8_t rs1,
                              vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmacc_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs1,
                              vint8mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vwmacc_mu(vbool32_t vm, vint16mf2_t vd, int8_t rs1,
                              vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmacc_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs1,
                             vint8mf2_t vs2, size_t vl);
vint16m1_t __riscv_vwmacc_mu(vbool16_t vm, vint16m1_t vd, int8_t rs1,
                             vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmacc_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs1,
                             vint8m1_t vs2, size_t vl);
vint16m2_t __riscv_vwmacc_mu(vbool8_t vm, vint16m2_t vd, int8_t rs1,
                             vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmacc_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs1,
                             vint8m2_t vs2, size_t vl);
vint16m4_t __riscv_vwmacc_mu(vbool4_t vm, vint16m4_t vd, int8_t rs1,
                             vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmacc_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs1,
                             vint8m4_t vs2, size_t vl);
vint16m8_t __riscv_vwmacc_mu(vbool2_t vm, vint16m8_t vd, int8_t rs1,
                             vint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmacc_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs1,
                              vint16mf4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmacc_mu(vbool64_t vm, vint32mf2_t vd, int16_t rs1,
                              vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmacc_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs1,
                             vint16mf2_t vs2, size_t vl);
vint32m1_t __riscv_vwmacc_mu(vbool32_t vm, vint32m1_t vd, int16_t rs1,
                             vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmacc_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs1,
                             vint16m1_t vs2, size_t vl);
vint32m2_t __riscv_vwmacc_mu(vbool16_t vm, vint32m2_t vd, int16_t rs1,
                             vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmacc_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs1,
                             vint16m2_t vs2, size_t vl);
vint32m4_t __riscv_vwmacc_mu(vbool8_t vm, vint32m4_t vd, int16_t rs1,
                             vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmacc_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs1,
                             vint16m4_t vs2, size_t vl);
vint32m8_t __riscv_vwmacc_mu(vbool4_t vm, vint32m8_t vd, int16_t rs1,
                             vint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmacc_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs1,
                             vint32mf2_t vs2, size_t vl);
vint64m1_t __riscv_vwmacc_mu(vbool64_t vm, vint64m1_t vd, int32_t rs1,
                             vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmacc_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs1,
                             vint32m1_t vs2, size_t vl);
vint64m2_t __riscv_vwmacc_mu(vbool32_t vm, vint64m2_t vd, int32_t rs1,
                             vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmacc_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs1,
                             vint32m2_t vs2, size_t vl);
vint64m4_t __riscv_vwmacc_mu(vbool16_t vm, vint64m4_t vd, int32_t rs1,
                             vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmacc_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs1,
                             vint32m4_t vs2, size_t vl);
vint64m8_t __riscv_vwmacc_mu(vbool8_t vm, vint64m8_t vd, int32_t rs1,
                             vint32m4_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccsu_mu(vbool64_t vm, vint16mf4_t vd, vint8mf8_t vs1,
                                vuint8mf8_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccsu_mu(vbool64_t vm, vint16mf4_t vd, int8_t rs1,
                                vuint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccsu_mu(vbool32_t vm, vint16mf2_t vd, vint8mf4_t vs1,
                                vuint8mf4_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccsu_mu(vbool32_t vm, vint16mf2_t vd, int8_t rs1,
                                vuint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccsu_mu(vbool16_t vm, vint16m1_t vd, vint8mf2_t vs1,
                               vuint8mf2_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccsu_mu(vbool16_t vm, vint16m1_t vd, int8_t rs1,
                               vuint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccsu_mu(vbool8_t vm, vint16m2_t vd, vint8m1_t vs1,
                               vuint8m1_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccsu_mu(vbool8_t vm, vint16m2_t vd, int8_t rs1,
                               vuint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccsu_mu(vbool4_t vm, vint16m4_t vd, vint8m2_t vs1,
                               vuint8m2_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccsu_mu(vbool4_t vm, vint16m4_t vd, int8_t rs1,
                               vuint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccsu_mu(vbool2_t vm, vint16m8_t vd, vint8m4_t vs1,
                               vuint8m4_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccsu_mu(vbool2_t vm, vint16m8_t vd, int8_t rs1,
                               vuint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccsu_mu(vbool64_t vm, vint32mf2_t vd, vint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccsu_mu(vbool64_t vm, vint32mf2_t vd, int16_t rs1,
                                vuint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccsu_mu(vbool32_t vm, vint32m1_t vd, vint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccsu_mu(vbool32_t vm, vint32m1_t vd, int16_t rs1,
                               vuint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccsu_mu(vbool16_t vm, vint32m2_t vd, vint16m1_t vs1,
                               vuint16m1_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccsu_mu(vbool16_t vm, vint32m2_t vd, int16_t rs1,
                               vuint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccsu_mu(vbool8_t vm, vint32m4_t vd, vint16m2_t vs1,
                               vuint16m2_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccsu_mu(vbool8_t vm, vint32m4_t vd, int16_t rs1,
                               vuint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccsu_mu(vbool4_t vm, vint32m8_t vd, vint16m4_t vs1,
                               vuint16m4_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccsu_mu(vbool4_t vm, vint32m8_t vd, int16_t rs1,
                               vuint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccsu_mu(vbool64_t vm, vint64m1_t vd, vint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccsu_mu(vbool64_t vm, vint64m1_t vd, int32_t rs1,
                               vuint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccsu_mu(vbool32_t vm, vint64m2_t vd, vint32m1_t vs1,
                               vuint32m1_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccsu_mu(vbool32_t vm, vint64m2_t vd, int32_t rs1,
                               vuint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccsu_mu(vbool16_t vm, vint64m4_t vd, vint32m2_t vs1,
                               vuint32m2_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccsu_mu(vbool16_t vm, vint64m4_t vd, int32_t rs1,
                               vuint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccsu_mu(vbool8_t vm, vint64m8_t vd, vint32m4_t vs1,
                               vuint32m4_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccsu_mu(vbool8_t vm, vint64m8_t vd, int32_t rs1,
                               vuint32m4_t vs2, size_t vl);
vint16mf4_t __riscv_vwmaccus_mu(vbool64_t vm, vint16mf4_t vd, uint8_t rs1,
                                vint8mf8_t vs2, size_t vl);
vint16mf2_t __riscv_vwmaccus_mu(vbool32_t vm, vint16mf2_t vd, uint8_t rs1,
                                vint8mf4_t vs2, size_t vl);
vint16m1_t __riscv_vwmaccus_mu(vbool16_t vm, vint16m1_t vd, uint8_t rs1,
                               vint8mf2_t vs2, size_t vl);
vint16m2_t __riscv_vwmaccus_mu(vbool8_t vm, vint16m2_t vd, uint8_t rs1,
                               vint8m1_t vs2, size_t vl);
vint16m4_t __riscv_vwmaccus_mu(vbool4_t vm, vint16m4_t vd, uint8_t rs1,
                               vint8m2_t vs2, size_t vl);
vint16m8_t __riscv_vwmaccus_mu(vbool2_t vm, vint16m8_t vd, uint8_t rs1,
                               vint8m4_t vs2, size_t vl);
vint32mf2_t __riscv_vwmaccus_mu(vbool64_t vm, vint32mf2_t vd, uint16_t rs1,
                                vint16mf4_t vs2, size_t vl);
vint32m1_t __riscv_vwmaccus_mu(vbool32_t vm, vint32m1_t vd, uint16_t rs1,
                               vint16mf2_t vs2, size_t vl);
vint32m2_t __riscv_vwmaccus_mu(vbool16_t vm, vint32m2_t vd, uint16_t rs1,
                               vint16m1_t vs2, size_t vl);
vint32m4_t __riscv_vwmaccus_mu(vbool8_t vm, vint32m4_t vd, uint16_t rs1,
                               vint16m2_t vs2, size_t vl);
vint32m8_t __riscv_vwmaccus_mu(vbool4_t vm, vint32m8_t vd, uint16_t rs1,
                               vint16m4_t vs2, size_t vl);
vint64m1_t __riscv_vwmaccus_mu(vbool64_t vm, vint64m1_t vd, uint32_t rs1,
                               vint32mf2_t vs2, size_t vl);
vint64m2_t __riscv_vwmaccus_mu(vbool32_t vm, vint64m2_t vd, uint32_t rs1,
                               vint32m1_t vs2, size_t vl);
vint64m4_t __riscv_vwmaccus_mu(vbool16_t vm, vint64m4_t vd, uint32_t rs1,
                               vint32m2_t vs2, size_t vl);
vint64m8_t __riscv_vwmaccus_mu(vbool8_t vm, vint64m8_t vd, uint32_t rs1,
                               vint32m4_t vs2, size_t vl);
vuint16mf4_t __riscv_vwmaccu_mu(vbool64_t vm, vuint16mf4_t vd, vuint8mf8_t vs1,
                                vuint8mf8_t vs2, size_t vl);
vuint16mf4_t __riscv_vwmaccu_mu(vbool64_t vm, vuint16mf4_t vd, uint8_t rs1,
                                vuint8mf8_t vs2, size_t vl);
vuint16mf2_t __riscv_vwmaccu_mu(vbool32_t vm, vuint16mf2_t vd, vuint8mf4_t vs1,
                                vuint8mf4_t vs2, size_t vl);
vuint16mf2_t __riscv_vwmaccu_mu(vbool32_t vm, vuint16mf2_t vd, uint8_t rs1,
                                vuint8mf4_t vs2, size_t vl);
vuint16m1_t __riscv_vwmaccu_mu(vbool16_t vm, vuint16m1_t vd, vuint8mf2_t vs1,
                               vuint8mf2_t vs2, size_t vl);
vuint16m1_t __riscv_vwmaccu_mu(vbool16_t vm, vuint16m1_t vd, uint8_t rs1,
                               vuint8mf2_t vs2, size_t vl);
vuint16m2_t __riscv_vwmaccu_mu(vbool8_t vm, vuint16m2_t vd, vuint8m1_t vs1,
                               vuint8m1_t vs2, size_t vl);
vuint16m2_t __riscv_vwmaccu_mu(vbool8_t vm, vuint16m2_t vd, uint8_t rs1,
                               vuint8m1_t vs2, size_t vl);
vuint16m4_t __riscv_vwmaccu_mu(vbool4_t vm, vuint16m4_t vd, vuint8m2_t vs1,
                               vuint8m2_t vs2, size_t vl);
vuint16m4_t __riscv_vwmaccu_mu(vbool4_t vm, vuint16m4_t vd, uint8_t rs1,
                               vuint8m2_t vs2, size_t vl);
vuint16m8_t __riscv_vwmaccu_mu(vbool2_t vm, vuint16m8_t vd, vuint8m4_t vs1,
                               vuint8m4_t vs2, size_t vl);
vuint16m8_t __riscv_vwmaccu_mu(vbool2_t vm, vuint16m8_t vd, uint8_t rs1,
                               vuint8m4_t vs2, size_t vl);
vuint32mf2_t __riscv_vwmaccu_mu(vbool64_t vm, vuint32mf2_t vd, vuint16mf4_t vs1,
                                vuint16mf4_t vs2, size_t vl);
vuint32mf2_t __riscv_vwmaccu_mu(vbool64_t vm, vuint32mf2_t vd, uint16_t rs1,
                                vuint16mf4_t vs2, size_t vl);
vuint32m1_t __riscv_vwmaccu_mu(vbool32_t vm, vuint32m1_t vd, vuint16mf2_t vs1,
                               vuint16mf2_t vs2, size_t vl);
vuint32m1_t __riscv_vwmaccu_mu(vbool32_t vm, vuint32m1_t vd, uint16_t rs1,
                               vuint16mf2_t vs2, size_t vl);
vuint32m2_t __riscv_vwmaccu_mu(vbool16_t vm, vuint32m2_t vd, vuint16m1_t vs1,
                               vuint16m1_t vs2, size_t vl);
vuint32m2_t __riscv_vwmaccu_mu(vbool16_t vm, vuint32m2_t vd, uint16_t rs1,
                               vuint16m1_t vs2, size_t vl);
vuint32m4_t __riscv_vwmaccu_mu(vbool8_t vm, vuint32m4_t vd, vuint16m2_t vs1,
                               vuint16m2_t vs2, size_t vl);
vuint32m4_t __riscv_vwmaccu_mu(vbool8_t vm, vuint32m4_t vd, uint16_t rs1,
                               vuint16m2_t vs2, size_t vl);
vuint32m8_t __riscv_vwmaccu_mu(vbool4_t vm, vuint32m8_t vd, vuint16m4_t vs1,
                               vuint16m4_t vs2, size_t vl);
vuint32m8_t __riscv_vwmaccu_mu(vbool4_t vm, vuint32m8_t vd, uint16_t rs1,
                               vuint16m4_t vs2, size_t vl);
vuint64m1_t __riscv_vwmaccu_mu(vbool64_t vm, vuint64m1_t vd, vuint32mf2_t vs1,
                               vuint32mf2_t vs2, size_t vl);
vuint64m1_t __riscv_vwmaccu_mu(vbool64_t vm, vuint64m1_t vd, uint32_t rs1,
                               vuint32mf2_t vs2, size_t vl);
vuint64m2_t __riscv_vwmaccu_mu(vbool32_t vm, vuint64m2_t vd, vuint32m1_t vs1,
                               vuint32m1_t vs2, size_t vl);
vuint64m2_t __riscv_vwmaccu_mu(vbool32_t vm, vuint64m2_t vd, uint32_t rs1,
                               vuint32m1_t vs2, size_t vl);
vuint64m4_t __riscv_vwmaccu_mu(vbool16_t vm, vuint64m4_t vd, vuint32m2_t vs1,
                               vuint32m2_t vs2, size_t vl);
vuint64m4_t __riscv_vwmaccu_mu(vbool16_t vm, vuint64m4_t vd, uint32_t rs1,
                               vuint32m2_t vs2, size_t vl);
vuint64m8_t __riscv_vwmaccu_mu(vbool8_t vm, vuint64m8_t vd, vuint32m4_t vs1,
                               vuint32m4_t vs2, size_t vl);
vuint64m8_t __riscv_vwmaccu_mu(vbool8_t vm, vuint64m8_t vd, uint32_t rs1,
                               vuint32m4_t vs2, size_t vl);

Vector Integer Merge Intrinsics

vint8mf8_t __riscv_vmerge_tu(vint8mf8_t vd, vint8mf8_t vs2, vint8mf8_t vs1,
                             vbool64_t v0, size_t vl);
vint8mf8_t __riscv_vmerge_tu(vint8mf8_t vd, vint8mf8_t vs2, int8_t rs1,
                             vbool64_t v0, size_t vl);
vint8mf4_t __riscv_vmerge_tu(vint8mf4_t vd, vint8mf4_t vs2, vint8mf4_t vs1,
                             vbool32_t v0, size_t vl);
vint8mf4_t __riscv_vmerge_tu(vint8mf4_t vd, vint8mf4_t vs2, int8_t rs1,
                             vbool32_t v0, size_t vl);
vint8mf2_t __riscv_vmerge_tu(vint8mf2_t vd, vint8mf2_t vs2, vint8mf2_t vs1,
                             vbool16_t v0, size_t vl);
vint8mf2_t __riscv_vmerge_tu(vint8mf2_t vd, vint8mf2_t vs2, int8_t rs1,
                             vbool16_t v0, size_t vl);
vint8m1_t __riscv_vmerge_tu(vint8m1_t vd, vint8m1_t vs2, vint8m1_t vs1,
                            vbool8_t v0, size_t vl);
vint8m1_t __riscv_vmerge_tu(vint8m1_t vd, vint8m1_t vs2, int8_t rs1,
                            vbool8_t v0, size_t vl);
vint8m2_t __riscv_vmerge_tu(vint8m2_t vd, vint8m2_t vs2, vint8m2_t vs1,
                            vbool4_t v0, size_t vl);
vint8m2_t __riscv_vmerge_tu(vint8m2_t vd, vint8m2_t vs2, int8_t rs1,
                            vbool4_t v0, size_t vl);
vint8m4_t __riscv_vmerge_tu(vint8m4_t vd, vint8m4_t vs2, vint8m4_t vs1,
                            vbool2_t v0, size_t vl);
vint8m4_t __riscv_vmerge_tu(vint8m4_t vd, vint8m4_t vs2, int8_t rs1,
                            vbool2_t v0, size_t vl);
vint8m8_t __riscv_vmerge_tu(vint8m8_t vd, vint8m8_t vs2, vint8m8_t vs1,
                            vbool1_t v0, size_t vl);
vint8m8_t __riscv_vmerge_tu(vint8m8_t vd, vint8m8_t vs2, int8_t rs1,
                            vbool1_t v0, size_t vl);
vint16mf4_t __riscv_vmerge_tu(vint16mf4_t vd, vint16mf4_t vs2, vint16mf4_t vs1,
                              vbool64_t v0, size_t vl);
vint16mf4_t __riscv_vmerge_tu(vint16mf4_t vd, vint16mf4_t vs2, int16_t rs1,
                              vbool64_t v0, size_t vl);
vint16mf2_t __riscv_vmerge_tu(vint16mf2_t vd, vint16mf2_t vs2, vint16mf2_t vs1,
                              vbool32_t v0, size_t vl);
vint16mf2_t __riscv_vmerge_tu(vint16mf2_t vd, vint16mf2_t vs2, int16_t rs1,
                              vbool32_t v0, size_t vl);
vint16m1_t __riscv_vmerge_tu(vint16m1_t vd, vint16m1_t vs2, vint16m1_t vs1,
                             vbool16_t v0, size_t vl);
vint16m1_t __riscv_vmerge_tu(vint16m1_t vd, vint16m1_t vs2, int16_t rs1,
                             vbool16_t v0, size_t vl);
vint16m2_t __riscv_vmerge_tu(vint16m2_t vd, vint16m2_t vs2, vint16m2_t vs1,
                             vbool8_t v0, size_t vl);
vint16m2_t __riscv_vmerge_tu(vint16m2_t vd, vint16m2_t vs2, int16_t rs1,
                             vbool8_t v0, size_t vl);
vint16m4_t __riscv_vmerge_tu(vint16m4_t vd, vint16m4_t vs2, vint16m4_t vs1,
                             vbool4_t v0, size_t vl);
vint16m4_t __riscv_vmerge_tu(vint16m4_t vd, vint16m4_t vs2, int16_t rs1,
                             vbool4_t v0, size_t vl);
vint16m8_t __riscv_vmerge_tu(vint16m8_t vd, vint16m8_t vs2, vint16m8_t vs1,
                             vbool2_t v0, size_t vl);
vint16m8_t __riscv_vmerge_tu(vint16m8_t vd, vint16m8_t vs2, int16_t rs1,
                             vbool2_t v0, size_t vl);
vint32mf2_t __riscv_vmerge_tu(vint32mf2_t vd, vint32mf2_t vs2, vint32mf2_t vs1,
                              vbool64_t v0, size_t vl);
vint32mf2_t __riscv_vmerge_tu(vint32mf2_t vd, vint32mf2_t vs2, int32_t rs1,
                              vbool64_t v0, size_t vl);
vint32m1_t __riscv_vmerge_tu(vint32m1_t vd, vint32m1_t vs2, vint32m1_t vs1,
                             vbool32_t v0, size_t vl);
vint32m1_t __riscv_vmerge_tu(vint32m1_t vd, vint32m1_t vs2, int32_t rs1,
                             vbool32_t v0, size_t vl);
vint32m2_t __riscv_vmerge_tu(vint32m2_t vd, vint32m2_t vs2, vint32m2_t vs1,
                             vbool16_t v0, size_t vl);
vint32m2_t __riscv_vmerge_tu(vint32m2_t vd, vint32m2_t vs2, int32_t rs1,
                             vbool16_t v0, size_t vl);
vint32m4_t __riscv_vmerge_tu(vint32m4_t vd, vint32m4_t vs2, vint32m4_t vs1,
                             vbool8_t v0, size_t vl);
vint32m4_t __riscv_vmerge_tu(vint32m4_t vd, vint32m4_t vs2, int32_t rs1,
                             vbool8_t v0, size_t vl);
vint32m8_t __riscv_vmerge_tu(vint32m8_t vd, vint32m8_t vs2, vint32m8_t vs1,
                             vbool4_t v0, size_t vl);
vint32m8_t __riscv_vmerge_tu(vint32m8_t vd, vint32m8_t vs2, int32_t rs1,
                             vbool4_t v0, size_t vl);
vint64m1_t __riscv_vmerge_tu(vint64m1_t vd, vint64m1_t vs2, vint64m1_t vs1,
                             vbool64_t v0, size_t vl);
vint64m1_t __riscv_vmerge_tu(vint64m1_t vd, vint64m1_t vs2, int64_t rs1,
                             vbool64_t v0, size_t vl);
vint64m2_t __riscv_vmerge_tu(vint64m2_t vd, vint64m2_t vs2, vint64m2_t vs1,
                             vbool32_t v0, size_t vl);
vint64m2_t __riscv_vmerge_tu(vint64m2_t vd, vint64m2_t vs2, int64_t rs1,
                             vbool32_t v0, size_t vl);
vint64m4_t __riscv_vmerge_tu(vint64m4_t vd, vint64m4_t vs2, vint64m4_t vs1,
                             vbool16_t v0, size_t vl);
vint64m4_t __riscv_vmerge_tu(vint64m4_t vd, vint64m4_t vs2, int64_t rs1,
                             vbool16_t v0, size_t vl);
vint64m8_t __riscv_vmerge_tu(vint64m8_t vd, vint64m8_t vs2, vint64m8_t vs1,
                             vbool8_t v0, size_t vl);
vint64m8_t __riscv_vmerge_tu(vint64m8_t vd, vint64m8_t vs2, int64_t rs1,
                             vbool8_t v0, size_t vl);
vuint8mf8_t __riscv_vmerge_tu(vuint8mf8_t vd, vuint8mf8_t vs2, vuint8mf8_t vs1,
                              vbool64_t v0, size_t vl);
vuint8mf8_t __riscv_vmerge_tu(vuint8mf8_t vd, vuint8mf8_t vs2, uint8_t rs1,
                              vbool64_t v0, size_t vl);
vuint8mf4_t __riscv_vmerge_tu(vuint8mf4_t vd, vuint8mf4_t vs2, vuint8mf4_t vs1,
                              vbool32_t v0, size_t vl);
vuint8mf4_t __riscv_vmerge_tu(vuint8mf4_t vd, vuint8mf4_t vs2, uint8_t rs1,
                              vbool32_t v0, size_t vl);
vuint8mf2_t __riscv_vmerge_tu(vuint8mf2_t vd, vuint8mf2_t vs2, vuint8mf2_t vs1,
                              vbool16_t v0, size_t vl);
vuint8mf2_t __riscv_vmerge_tu(vuint8mf2_t vd, vuint8mf2_t vs2, uint8_t rs1,
                              vbool16_t v0, size_t vl);
vuint8m1_t __riscv_vmerge_tu(vuint8m1_t vd, vuint8m1_t vs2, vuint8m1_t vs1,
                             vbool8_t v0, size_t vl);
vuint8m1_t __riscv_vmerge_tu(vuint8m1_t vd, vuint8m1_t vs2, uint8_t rs1,
                             vbool8_t v0, size_t vl);
vuint8m2_t __riscv_vmerge_tu(vuint8m2_t vd, vuint8m2_t vs2, vuint8m2_t vs1,
                             vbool4_t v0, size_t vl);
vuint8m2_t __riscv_vmerge_tu(vuint8m2_t vd, vuint8m2_t vs2, uint8_t rs1,
                             vbool4_t v0, size_t vl);
vuint8m4_t __riscv_vmerge_tu(vuint8m4_t vd, vuint8m4_t vs2, vuint8m4_t vs1,
                             vbool2_t v0, size_t vl);
vuint8m4_t __riscv_vmerge_tu(vuint8m4_t vd, vuint8m4_t vs2, uint8_t rs1,
                             vbool2_t v0, size_t vl);
vuint8m8_t __riscv_vmerge_tu(vuint8m8_t vd, vuint8m8_t vs2, vuint8m8_t vs1,
                             vbool1_t v0, size_t vl);
vuint8m8_t __riscv_vmerge_tu(vuint8m8_t vd, vuint8m8_t vs2, uint8_t rs1,
                             vbool1_t v0, size_t vl);
vuint16mf4_t __riscv_vmerge_tu(vuint16mf4_t vd, vuint16mf4_t vs2,
                               vuint16mf4_t vs1, vbool64_t v0, size_t vl);
vuint16mf4_t __riscv_vmerge_tu(vuint16mf4_t vd, vuint16mf4_t vs2, uint16_t rs1,
                               vbool64_t v0, size_t vl);
vuint16mf2_t __riscv_vmerge_tu(vuint16mf2_t vd, vuint16mf2_t vs2,
                               vuint16mf2_t vs1, vbool32_t v0, size_t vl);
vuint16mf2_t __riscv_vmerge_tu(vuint16mf2_t vd, vuint16mf2_t vs2, uint16_t rs1,
                               vbool32_t v0, size_t vl);
vuint16m1_t __riscv_vmerge_tu(vuint16m1_t vd, vuint16m1_t vs2, vuint16m1_t vs1,
                              vbool16_t v0, size_t vl);
vuint16m1_t __riscv_vmerge_tu(vuint16m1_t vd, vuint16m1_t vs2, uint16_t rs1,
                              vbool16_t v0, size_t vl);
vuint16m2_t __riscv_vmerge_tu(vuint16m2_t vd, vuint16m2_t vs2, vuint16m2_t vs1,
                              vbool8_t v0, size_t vl);
vuint16m2_t __riscv_vmerge_tu(vuint16m2_t vd, vuint16m2_t vs2, uint16_t rs1,
                              vbool8_t v0, size_t vl);
vuint16m4_t __riscv_vmerge_tu(vuint16m4_t vd, vuint16m4_t vs2, vuint16m4_t vs1,
                              vbool4_t v0, size_t vl);
vuint16m4_t __riscv_vmerge_tu(vuint16m4_t vd, vuint16m4_t vs2, uint16_t rs1,
                              vbool4_t v0, size_t vl);
vuint16m8_t __riscv_vmerge_tu(vuint16m8_t vd, vuint16m8_t vs2, vuint16m8_t vs1,
                              vbool2_t v0, size_t vl);
vuint16m8_t __riscv_vmerge_tu(vuint16m8_t vd, vuint16m8_t vs2, uint16_t rs1,
                              vbool2_t v0, size_t vl);
vuint32mf2_t __riscv_vmerge_tu(vuint32mf2_t vd, vuint32mf2_t vs2,
                               vuint32mf2_t vs1, vbool64_t v0, size_t vl);
vuint32mf2_t __riscv_vmerge_tu(vuint32mf2_t vd, vuint32mf2_t vs2, uint32_t rs1,
                               vbool64_t v0, size_t vl);
vuint32m1_t __riscv_vmerge_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1,
                              vbool32_t v0, size_t vl);
vuint32m1_t __riscv_vmerge_tu(vuint32m1_t vd, vuint32m1_t vs2, uint32_t rs1,
                              vbool32_t v0, size_t vl);
vuint32m2_t __riscv_vmerge_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1,
                              vbool16_t v0, size_t vl);
vuint32m2_t __riscv_vmerge_tu(vuint32m2_t vd, vuint32m2_t vs2, uint32_t rs1,
                              vbool16_t v0, size_t vl);
vuint32m4_t __riscv_vmerge_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1,
                              vbool8_t v0, size_t vl);
vuint32m4_t __riscv_vmerge_tu(vuint32m4_t vd, vuint32m4_t vs2, uint32_t rs1,
                              vbool8_t v0, size_t vl);
vuint32m8_t __riscv_vmerge_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1,
                              vbool4_t v0, size_t vl);
vuint32m8_t __riscv_vmerge_tu(vuint32m8_t vd, vuint32m8_t vs2, uint32_t rs1,
                              vbool4_t v0, size_t vl);
vuint64m1_t __riscv_vmerge_tu(vuint64m1_t vd, vuint64m1_t vs2, vuint64m1_t vs1,
                              vbool64_t v0, size_t vl);
vuint64m1_t __riscv_vmerge_tu(vuint64m1_t vd, vuint64m1_t vs2, uint64_t rs1,
                              vbool64_t v0, size_t vl);
vuint64m2_t __riscv_vmerge_tu(vuint64m2_t vd, vuint64m2_t vs2, vuint64m2_t vs1,
                              vbool32_t v0, size_t vl);
vuint64m2_t __riscv_vmerge_tu(vuint64m2_t vd, vuint64m2_t vs2, uint64_t rs1,
                              vbool32_t v0, size_t vl);
vuint64m4_t __riscv_vmerge_tu(vuint64m4_t vd, vuint64m4_t vs2, vuint64m4_t vs1,
                              vbool16_t v0, size_t vl);
vuint64m4_t __riscv_vmerge_tu(vuint64m4_t vd, vuint64m4_t vs2, uint64_t rs1,
                              vbool16_t v0, size_t vl);
vuint64m8_t __riscv_vmerge_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1,
                              vbool8_t v0, size_t vl);
vuint64m8_t __riscv_vmerge_tu(vuint64m8_t vd, vuint64m8_t vs2, uint64_t rs1,
                              vbool8_t v0, size_t vl);

Vector Integer Move Intrinsics

vint8mf8_t __riscv_vmv_v_tu(vint8mf8_t vd, vint8mf8_t vs1, size_t vl);
vint8mf8_t __riscv_vmv_v_tu(vint8mf8_t vd, int8_t rs1, size_t vl);
vint8mf4_t __riscv_vmv_v_tu(vint8mf4_t vd, vint8mf4_t vs1, size_t vl);
vint8mf4_t __riscv_vmv_v_tu(vint8mf4_t vd, int8_t rs1, size_t vl);
vint8mf2_t __riscv_vmv_v_tu(vint8mf2_t vd, vint8mf2_t vs1, size_t vl);
vint8mf2_t __riscv_vmv_v_tu(vint8mf2_t vd, int8_t rs1, size_t vl);
vint8m1_t __riscv_vmv_v_tu(vint8m1_t vd, vint8m1_t vs1, size_t vl);
vint8m1_t __riscv_vmv_v_tu(vint8m1_t vd, int8_t rs1, size_t vl);
vint8m2_t __riscv_vmv_v_tu(vint8m2_t vd, vint8m2_t vs1, size_t vl);
vint8m2_t __riscv_vmv_v_tu(vint8m2_t vd, int8_t rs1, size_t vl);
vint8m4_t __riscv_vmv_v_tu(vint8m4_t vd, vint8m4_t vs1, size_t vl);
vint8m4_t __riscv_vmv_v_tu(vint8m4_t vd, int8_t rs1, size_t vl);
vint8m8_t __riscv_vmv_v_tu(vint8m8_t vd, vint8m8_t vs1, size_t vl);
vint8m8_t __riscv_vmv_v_tu(vint8m8_t vd, int8_t rs1, size_t vl);
vint16mf4_t __riscv_vmv_v_tu(vint16mf4_t vd, vint16mf4_t vs1, size_t vl);
vint16mf4_t __riscv_vmv_v_tu(vint16mf4_t vd, int16_t rs1, size_t vl);
vint16mf2_t __riscv_vmv_v_tu(vint16mf2_t vd, vint16mf2_t vs1, size_t vl);
vint16mf2_t __riscv_vmv_v_tu(vint16mf2_t vd, int16_t rs1, size_t vl);
vint16m1_t __riscv_vmv_v_tu(vint16m1_t vd, vint16m1_t vs1, size_t vl);
vint16m1_t __riscv_vmv_v_tu(vint16m1_t vd, int16_t rs1, size_t vl);
vint16m2_t __riscv_vmv_v_tu(vint16m2_t vd, vint16m2_t vs1, size_t vl);
vint16m2_t __riscv_vmv_v_tu(vint16m2_t vd, int16_t rs1, size_t vl);
vint16m4_t __riscv_vmv_v_tu(vint16m4_t vd, vint16m4_t vs1, size_t vl);
vint16m4_t __riscv_vmv_v_tu(vint16m4_t vd, int16_t rs1, size_t vl);
vint16m8_t __riscv_vmv_v_tu(vint16m8_t vd, vint16m8_t vs1, size_t vl);
vint16m8_t __riscv_vmv_v_tu(vint16m8_t vd, int16_t rs1, size_t vl);
vint32mf2_t __riscv_vmv_v_tu(vint32mf2_t vd, vint32mf2_t vs1, size_t vl);
vint32mf2_t __riscv_vmv_v_tu(vint32mf2_t vd, int32_t rs1, size_t vl);
vint32m1_t __riscv_vmv_v_tu(vint32m1_t vd, vint32m1_t vs1, size_t vl);
vint32m1_t __riscv_vmv_v_tu(vint32m1_t vd, int32_t rs1, size_t vl);
vint32m2_t __riscv_vmv_v_tu(vint32m2_t vd, vint32m2_t vs1, size_t vl);
vint32m2_t __riscv_vmv_v_tu(vint32m2_t vd, int32_t rs1, size_t vl);
vint32m4_t __riscv_vmv_v_tu(vint32m4_t vd, vint32m4_t vs1, size_t vl);
vint32m4_t __riscv_vmv_v_tu(vint32m4_t vd, int32_t rs1, size_t vl);
vint32m8_t __riscv_vmv_v_tu(vint32m8_t vd, vint32m8_t vs1, size_t vl);
vint32m8_t __riscv_vmv_v_tu(vint32m8_t vd, int32_t rs1, size_t vl);
vint64m1_t __riscv_vmv_v_tu(vint64m1_t vd, vint64m1_t vs1, size_t vl);
vint64m1_t __riscv_vmv_v_tu(vint64m1_t vd, int64_t rs1, size_t vl);
vint64m2_t __riscv_vmv_v_tu(vint64m2_t vd, vint64m2_t vs1, size_t vl);
vint64m2_t __riscv_vmv_v_tu(vint64m2_t vd, int64_t rs1, size_t vl);
vint64m4_t __riscv_vmv_v_tu(vint64m4_t vd, vint64m4_t vs1, size_t vl);
vint64m4_t __riscv_vmv_v_tu(vint64m4_t vd, int64_t rs1, size_t vl);
vint64m8_t __riscv_vmv_v_tu(vint64m8_t vd, vint64m8_t vs1, size_t vl);
vint64m8_t __riscv_vmv_v_tu(vint64m8_t vd, int64_t rs1, size_t vl);
vuint8mf8_t __riscv_vmv_v_tu(vuint8mf8_t vd, vuint8mf8_t vs1, size_t vl);
vuint8mf8_t __riscv_vmv_v_tu(vuint8mf8_t vd, uint8_t rs1, size_t vl);
vuint8mf4_t __riscv_vmv_v_tu(vuint8mf4_t vd, vuint8mf4_t vs1, size_t vl);
vuint8mf4_t __riscv_vmv_v_tu(vuint8mf4_t vd, uint8_t rs1, size_t vl);
vuint8mf2_t __riscv_vmv_v_tu(vuint8mf2_t vd, vuint8mf2_t vs1, size_t vl);
vuint8mf2_t __riscv_vmv_v_tu(vuint8mf2_t vd, uint8_t rs1, size_t vl);
vuint8m1_t __riscv_vmv_v_tu(vuint8m1_t vd, vuint8m1_t vs1, size_t vl);
vuint8m1_t __riscv_vmv_v_tu(vuint8m1_t vd, uint8_t rs1, size_t vl);
vuint8m2_t __riscv_vmv_v_tu(vuint8m2_t vd, vuint8m2_t vs1, size_t vl);
vuint8m2_t __riscv_vmv_v_tu(vuint8m2_t vd, uint8_t rs1, size_t vl);
vuint8m4_t __riscv_vmv_v_tu(vuint8m4_t vd, vuint8m4_t vs1, size_t vl);
vuint8m4_t __riscv_vmv_v_tu(vuint8m4_t vd, uint8_t rs1, size_t vl);
vuint8m8_t __riscv_vmv_v_tu(vuint8m8_t vd, vuint8m8_t vs1, size_t vl);
vuint8m8_t __riscv_vmv_v_tu(vuint8m8_t vd, uint8_t rs1, size_t vl);
vuint16mf4_t __riscv_vmv_v_tu(vuint16mf4_t vd, vuint16mf4_t vs1, size_t vl);
vuint16mf4_t __riscv_vmv_v_tu(vuint16mf4_t vd, uint16_t rs1, size_t vl);
vuint16mf2_t __riscv_vmv_v_tu(vuint16mf2_t vd, vuint16mf2_t vs1, size_t vl);
vuint16mf2_t __riscv_vmv_v_tu(vuint16mf2_t vd, uint16_t rs1, size_t vl);
vuint16m1_t __riscv_vmv_v_tu(vuint16m1_t vd, vuint16m1_t vs1, size_t vl);
vuint16m1_t __riscv_vmv_v_tu(vuint16m1_t vd, uint16_t rs1, size_t vl);
vuint16m2_t __riscv_vmv_v_tu(vuint16m2_t vd, vuint16m2_t vs1, size_t vl);
vuint16m2_t __riscv_vmv_v_tu(vuint16m2_t vd, uint16_t rs1, size_t vl);
vuint16m4_t __riscv_vmv_v_tu(vuint16m4_t vd, vuint16m4_t vs1, size_t vl);
vuint16m4_t __riscv_vmv_v_tu(vuint16m4_t vd, uint16_t rs1, size_t vl);
vuint16m8_t __riscv_vmv_v_tu(vuint16m8_t vd, vuint16m8_t vs1, size_t vl);
vuint16m8_t __riscv_vmv_v_tu(vuint16m8_t vd, uint16_t rs1, size_t vl);
vuint32mf2_t __riscv_vmv_v_tu(vuint32mf2_t vd, vuint32mf2_t vs1, size_t vl);
vuint32mf2_t __riscv_vmv_v_tu(vuint32mf2_t vd, uint32_t rs1, size_t vl);
vuint32m1_t __riscv_vmv_v_tu(vuint32m1_t vd, vuint32m1_t vs1, size_t vl);
vuint32m1_t __riscv_vmv_v_tu(vuint32m1_t vd, uint32_t rs1, size_t vl);
vuint32m2_t __riscv_vmv_v_tu(vuint32m2_t vd, vuint32m2_t vs1, size_t vl);
vuint32m2_t __riscv_vmv_v_tu(vuint32m2_t vd, uint32_t rs1, size_t vl);
vuint32m4_t __riscv_vmv_v_tu(vuint32m4_t vd, vuint32m4_t vs1, size_t vl);
vuint32m4_t __riscv_vmv_v_tu(vuint32m4_t vd, uint32_t rs1, size_t vl);
vuint32m8_t __riscv_vmv_v_tu(vuint32m8_t vd, vuint32m8_t vs1, size_t vl);
vuint32m8_t __riscv_vmv_v_tu(vuint32m8_t vd, uint32_t rs1, size_t vl);
vuint64m1_t __riscv_vmv_v_tu(vuint64m1_t vd, vuint64m1_t vs1, size_t vl);
vuint64m1_t __riscv_vmv_v_tu(vuint64m1_t vd, uint64_t rs1, size_t vl);
vuint64m2_t __riscv_vmv_v_tu(vuint64m2_t vd, vuint64m2_t vs1, size_t vl);
vuint64m2_t __riscv_vmv_v_tu(vuint64m2_t vd, uint64_t rs1, size_t vl);
vuint64m4_t __riscv_vmv_v_tu(vuint64m4_t vd, vuint64m4_t vs1, size_t vl);
vuint64m4_t __riscv_vmv_v_tu(vuint64m4_t vd, uint64_t rs1, size_t vl);
vuint64m8_t __riscv_vmv_v_tu(vuint64m8_t vd, vuint64m8_t vs1, size_t vl);
vuint64m8_t __riscv_vmv_v_tu(vuint64m8_t vd, uint64_t rs1, size_t vl);