intrinsics.simd_shuffle

This commit is contained in:
gingerBill
2022-05-25 23:01:33 +01:00
parent 808ea30b48
commit 140c00aa0c
5 changed files with 137 additions and 10 deletions
+24
View File
@@ -1,7 +1,28 @@
package simd
import "core:builtin"
import "core:intrinsics"
// boolx16 :: #simd[16]bool
// b8x16 :: #simd[16]b8
// b16x8 :: #simd[8]b16
// b32x4 :: #simd[4]b32
// b64x2 :: #simd[2]b64
// u8x16 :: #simd[16]u8
// i8x16 :: #simd[16]i8
// u16x8 :: #simd[8]u16
// i16x8 :: #simd[8]i16
// u32x4 :: #simd[4]u32
// i32x4 :: #simd[4]i32
// u64x2 :: #simd[2]u64
// i64x2 :: #simd[2]i64
// f16x8 :: #simd[8]f16
// f32x4 :: #simd[4]f32
// f64x2 :: #simd[2]f64
add :: intrinsics.simd_add
sub :: intrinsics.simd_sub
mul :: intrinsics.simd_mul
@@ -42,6 +63,9 @@ reduce_and :: intrinsics.simd_reduce_and
reduce_or :: intrinsics.simd_reduce_or
reduce_xor :: intrinsics.simd_reduce_xor
swizzle :: builtin.swizzle
shuffle :: intrinsics.simd_shuffle
splat :: #force_inline proc "contextless" ($T: typeid/#simd[$LANES]$E, value: E) -> T {
return T{0..<LANES = value}
}
+64
View File
@@ -758,6 +758,64 @@ bool check_builtin_simd_operation(CheckerContext *c, Operand *operand, Ast *call
}
case BuiltinProc_simd_shuffle:
{
Operand x = {};
Operand y = {};
Operand z = {};
check_expr(c, &x, ce->args[0]); if (x.mode == Addressing_Invalid) { return false; }
check_expr_with_type_hint(c, &y, ce->args[1], x.type); if (y.mode == Addressing_Invalid) { return false; }
convert_to_typed(c, &y, x.type);
if (!is_type_simd_vector(x.type)) {
error(x.expr, "'%.*s' expected a simd vector type", LIT(builtin_name));
return false;
}
if (!is_type_simd_vector(y.type)) {
error(y.expr, "'%.*s' expected a simd vector type", LIT(builtin_name));
return false;
}
if (!are_types_identical(x.type, y.type)) {
gbString xs = type_to_string(x.type);
gbString ys = type_to_string(y.type);
error(x.expr, "'%.*s' expected 2 arguments of the same type, got '%s' vs '%s'", LIT(builtin_name), xs, ys);
gb_string_free(ys);
gb_string_free(xs);
return false;
}
Type *elem = base_array_type(x.type);
check_expr(c, &z, ce->args[2]); if (z.mode == Addressing_Invalid) { return false; }
Type *z_elem = base_array_type(z.type);
if (!is_type_simd_vector(z.type) || !are_types_identical(z_elem, t_u32)) {
gbString zstr = type_to_string(z.type);
error(z.expr, "'%.*s' expected a simd vector type with an element of type 'u32', got '%s'", LIT(builtin_name), zstr);
gb_string_free(zstr);
return false;
}
i64 x_count = x.type->SimdVector.count;
i64 z_count = z.type->SimdVector.count;
if (!is_power_of_two(z_count)) {
gbString zstr = type_to_string(z.type);
error(z.expr, "'%.*s' expected a simd vector type with a power of two length, got '%s'", LIT(builtin_name), zstr);
gb_string_free(zstr);
return false;
}
if (z_count > x_count) {
gbString zstr = type_to_string(z.type);
error(z.expr, "'%.*s' expected a simd vector type excepts the sum of the two input vectors, got '%s'", LIT(builtin_name), zstr);
gb_string_free(zstr);
return false;
}
operand->mode = Addressing_Value;
operand->type = alloc_type_simd_vector(z_count, elem);
return true;
}
// case BuiltinProc_simd_rotate_left:
// {
// Operand x = {};
@@ -3131,6 +3189,12 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
operand->mode = Addressing_Type;
operand->type = alloc_type_simd_vector(count, elem);
if (is_arch_wasm()) {
if (type_size_of(operand->type) != 16) {
error(x.expr, "wasm based targets are limited to 128-bit types");
}
}
break;
}
+8 -2
View File
@@ -2802,15 +2802,21 @@ bool check_type_internal(CheckerContext *ctx, Ast *e, Type **type, Type *named_t
*type = alloc_type_array(elem, count, generic_type);
goto array_end;
}
if (is_type_polymorphic(elem)) {
// Ignore
} else if (count < 1 || !is_power_of_two(count)) {
error(at->count, "Invalid length for 'intrinsics.simd_vector', expected a power of two length, got '%lld'", cast(long long)count);
*type = alloc_type_array(elem, count, generic_type);
goto array_end;
}
} else
*type = alloc_type_simd_vector(count, elem, generic_type);
if (is_arch_wasm()) {
if (type_size_of(*type) != 16) {
error(at->count, "wasm based targets are limited to 128-bit types");
}
}
} else {
error(at->tag, "Invalid tag applied to array, got #%.*s", LIT(name));
*type = alloc_type_array(elem, count, generic_type);
+4
View File
@@ -157,6 +157,8 @@ BuiltinProc__simd_begin,
BuiltinProc_simd_reduce_and,
BuiltinProc_simd_reduce_or,
BuiltinProc_simd_reduce_xor,
BuiltinProc_simd_shuffle,
BuiltinProc__simd_end,
// Platform specific intrinsics
@@ -417,6 +419,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
{STR_LIT("simd_reduce_and"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("simd_reduce_or"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("simd_reduce_xor"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("simd_shuffle"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT(""), 0, false, Expr_Stmt, BuiltinProcPkg_intrinsics},
+37 -8
View File
@@ -981,6 +981,24 @@ lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args,
return result;
}
LLVMValueRef llvm_splat_float(i64 count, LLVMTypeRef type, f64 value) {
LLVMValueRef v = LLVMConstReal(type, value);
LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
for (i64 i = 0; i < count; i++) {
values[i] = v;
}
return LLVMConstVector(values, cast(unsigned)count);
}
LLVMValueRef llvm_splat_int(i64 count, LLVMTypeRef type, i64 value, bool is_signed=false) {
LLVMValueRef v = LLVMConstInt(type, value, is_signed);
LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
for (i64 i = 0; i < count; i++) {
values[i] = v;
}
return LLVMConstVector(values, cast(unsigned)count);
}
lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, BuiltinProcId builtin_id) {
ast_node(ce, CallExpr, expr);
@@ -1060,12 +1078,7 @@ lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAndValue const
case BuiltinProc_simd_shr_masked: op_code = is_signed ? LLVMAShr : LLVMLShr; is_masked = true; break;
}
if (op_code) {
LLVMValueRef bit_value = lb_const_int(m, elem1, sz*8 - 1).value;
LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
for (i64 i = 0; i < count; i++) {
values[i] = bit_value;
}
LLVMValueRef bits = LLVMConstVector(values, cast(unsigned)count);
LLVMValueRef bits = llvm_splat_int(count, lb_type(m, elem1), sz*8 - 1);
if (is_masked) {
// C logic
LLVMValueRef shift = LLVMBuildAnd(p->builder, arg1.value, bits, "");
@@ -1077,7 +1090,6 @@ lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAndValue const
LLVMValueRef shift = LLVMBuildBinOp(p->builder, op_code, arg0.value, arg1.value, "");
res.value = LLVMBuildSelect(p->builder, mask, shift, zero, "");
}
return res;
}
}
@@ -1264,7 +1276,24 @@ lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAndValue const
lbValue res = {};
res.value = LLVMBuildCall(p->builder, ip, args, gb_count_of(args), "");
res.type = tv.type;
return res;
}
case BuiltinProc_simd_shuffle:
{
arg1 = lb_build_expr(p, ce->args[1]);
arg2 = lb_build_expr(p, ce->args[2]);
Type *vt = arg0.type;
GB_ASSERT(vt->kind == Type_SimdVector);
LLVMValueRef mask = arg2.value;
i64 max_count = vt->SimdVector.count*2;
LLVMValueRef max_mask = llvm_splat_int(max_count, lb_type(m, arg2.type->SimdVector.elem), max_count-1);
mask = LLVMBuildAnd(p->builder, mask, max_mask, "");
res.value = LLVMBuildShuffleVector(p->builder, arg0.value, arg1.value, mask, "");
return res;
}
}