glsl: SHFL fix and prefer shift operations over divide in glsl shader
This commit is contained in:
		| @@ -20,7 +20,7 @@ static constexpr std::string_view cas_loop{R"(for (;;){{ | |||||||
| void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | ||||||
|                        std::string_view value, std::string_view function) { |                        std::string_view value, std::string_view function) { | ||||||
|     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; |     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||||||
|     const std::string smem{fmt::format("smem[{}/4]", offset)}; |     const std::string smem{fmt::format("smem[{}>>2]", offset)}; | ||||||
|     ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); |     ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -45,7 +45,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi | |||||||
|  |  | ||||||
| void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                             std::string_view value) { |                             std::string_view value) { | ||||||
|     ctx.AddU32("{}=atomicAdd(smem[{}/4],{});", inst, pointer_offset, value); |     ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
| @@ -56,7 +56,7 @@ void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view p | |||||||
|  |  | ||||||
| void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                             std::string_view value) { |                             std::string_view value) { | ||||||
|     ctx.AddU32("{}=atomicMin(smem[{}/4],{});", inst, pointer_offset, value); |     ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
| @@ -67,7 +67,7 @@ void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view p | |||||||
|  |  | ||||||
| void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                             std::string_view value) { |                             std::string_view value) { | ||||||
|     ctx.AddU32("{}=atomicMax(smem[{}/4],{});", inst, pointer_offset, value); |     ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
| @@ -82,31 +82,31 @@ void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view po | |||||||
|  |  | ||||||
| void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                            std::string_view value) { |                            std::string_view value) { | ||||||
|     ctx.AddU32("{}=atomicAnd(smem[{}/4],{});", inst, pointer_offset, value); |     ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                           std::string_view value) { |                           std::string_view value) { | ||||||
|     ctx.AddU32("{}=atomicOr(smem[{}/4],{});", inst, pointer_offset, value); |     ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                            std::string_view value) { |                            std::string_view value) { | ||||||
|     ctx.AddU32("{}=atomicXor(smem[{}/4],{});", inst, pointer_offset, value); |     ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                                 std::string_view value) { |                                 std::string_view value) { | ||||||
|     ctx.AddU32("{}=atomicExchange(smem[{}/4],{});", inst, pointer_offset, value); |     ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||||||
|                                 std::string_view value) { |                                 std::string_view value) { | ||||||
|     // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); |     // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); | ||||||
|     ctx.AddU64("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", inst, pointer_offset, |     ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, | ||||||
|                pointer_offset); |                pointer_offset); | ||||||
|     ctx.Add("smem[{}/4]=unpackUint2x32({}).x;smem[({}+4)/4]=unpackUint2x32({}).y;", pointer_offset, |     ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", | ||||||
|             value, pointer_offset, value); |             pointer_offset, value, pointer_offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||||||
|   | |||||||
| @@ -31,7 +31,7 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& | |||||||
|     } else { |     } else { | ||||||
|         const auto offset_var{ctx.var_alloc.Consume(offset)}; |         const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|         ctx.AddU32( |         ctx.AddU32( | ||||||
|             "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", |             "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", | ||||||
|             inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |             inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -46,8 +46,8 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& | |||||||
|     } else { |     } else { | ||||||
|         const auto offset_var{ctx.var_alloc.Consume(offset)}; |         const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|         ctx.AddU32( |         ctx.AddU32( | ||||||
|             "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst, |             "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", | ||||||
|             ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |             inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -60,7 +60,7 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst | |||||||
|                    ((offset.U32() / 2) % 2) * 16); |                    ((offset.U32() / 2) % 2) * 16); | ||||||
|     } else { |     } else { | ||||||
|         const auto offset_var{ctx.var_alloc.Consume(offset)}; |         const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|         ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int((({}/" |         ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" | ||||||
|                    "2)%2)*16),16);", |                    "2)%2)*16),16);", | ||||||
|                    inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |                    inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | ||||||
|     } |     } | ||||||
| @@ -75,9 +75,9 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst | |||||||
|                    ((offset.U32() / 2) % 2) * 16); |                    ((offset.U32() / 2) % 2) * 16); | ||||||
|     } else { |     } else { | ||||||
|         const auto offset_var{ctx.var_alloc.Consume(offset)}; |         const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|         ctx.AddU32( |         ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" | ||||||
|             "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int((({}/2)%2)*16),16);", |                    "2)%2)*16),16);", | ||||||
|             inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |                    inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -88,7 +88,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | |||||||
|                    offset.U32() / 16, OffsetSwizzle(offset.U32())); |                    offset.U32() / 16, OffsetSwizzle(offset.U32())); | ||||||
|     } else { |     } else { | ||||||
|         const auto offset_var{ctx.var_alloc.Consume(offset)}; |         const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|         ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]);", inst, ctx.stage_name, |         ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, | ||||||
|                    binding.U32(), offset_var, offset_var); |                    binding.U32(), offset_var, offset_var); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -100,7 +100,7 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | |||||||
|                    OffsetSwizzle(offset.U32())); |                    OffsetSwizzle(offset.U32())); | ||||||
|     } else { |     } else { | ||||||
|         const auto offset_var{ctx.var_alloc.Consume(offset)}; |         const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|         ctx.AddF32("{}={}_cbuf{}[{}/16][({}/4)%4];", inst, ctx.stage_name, binding.U32(), |         ctx.AddF32("{}={}_cbuf{}[{}/16][({}>>2)%4];", inst, ctx.stage_name, binding.U32(), | ||||||
|                    offset_var, offset_var); |                    offset_var, offset_var); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -116,7 +116,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding | |||||||
|     } else { |     } else { | ||||||
|         const auto offset_var{ctx.var_alloc.Consume(offset)}; |         const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|         ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" |         ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" | ||||||
|                      "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)/4)%4]));", |                      "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));", | ||||||
|                      inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, |                      inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, | ||||||
|                      binding.U32(), offset_var, offset_var); |                      binding.U32(), offset_var, offset_var); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -13,7 +13,7 @@ void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||||||
|                        [[maybe_unused]] const IR::Value& binding, |                        [[maybe_unused]] const IR::Value& binding, | ||||||
|                        [[maybe_unused]] const IR::Value& offset) { |                        [[maybe_unused]] const IR::Value& offset) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int({}%4)*8,8);", inst, ctx.stage_name, |     ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, | ||||||
|                binding.U32(), offset_var, offset_var); |                binding.U32(), offset_var, offset_var); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -21,7 +21,7 @@ void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||||||
|                        [[maybe_unused]] const IR::Value& binding, |                        [[maybe_unused]] const IR::Value& binding, | ||||||
|                        [[maybe_unused]] const IR::Value& offset) { |                        [[maybe_unused]] const IR::Value& offset) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int({}%4)*8,8);", inst, ctx.stage_name, |     ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, | ||||||
|                binding.U32(), offset_var, offset_var); |                binding.U32(), offset_var, offset_var); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -29,7 +29,7 @@ void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||||||
|                         [[maybe_unused]] const IR::Value& binding, |                         [[maybe_unused]] const IR::Value& binding, | ||||||
|                         [[maybe_unused]] const IR::Value& offset) { |                         [[maybe_unused]] const IR::Value& offset) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, ctx.stage_name, |     ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, | ||||||
|                binding.U32(), offset_var, offset_var); |                binding.U32(), offset_var, offset_var); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -37,30 +37,31 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||||||
|                         [[maybe_unused]] const IR::Value& binding, |                         [[maybe_unused]] const IR::Value& binding, | ||||||
|                         [[maybe_unused]] const IR::Value& offset) { |                         [[maybe_unused]] const IR::Value& offset) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, |     ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, | ||||||
|                ctx.stage_name, binding.U32(), offset_var, offset_var); |                ctx.stage_name, binding.U32(), offset_var, offset_var); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||||||
|                        const IR::Value& offset) { |                        const IR::Value& offset) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.AddU32("{}={}_ssbo{}[{}/4];", inst, ctx.stage_name, binding.U32(), offset_var); |     ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||||||
|                        const IR::Value& offset) { |                        const IR::Value& offset) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4]);", inst, ctx.stage_name, |     ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, | ||||||
|                  binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); |                  binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||||||
|                         const IR::Value& offset) { |                         const IR::Value& offset) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.AddU32x4( |     ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" | ||||||
|         "{}=uvec4({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4],{}_ssbo{}[({}+8)/4],{}_ssbo{}[({}+12)/4]);", |                  "+12)>>2]);", | ||||||
|         inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, |                  inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), | ||||||
|         ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); |                  offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, | ||||||
|  |                  binding.U32(), offset_var); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, | void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, | ||||||
| @@ -68,7 +69,7 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, | |||||||
|                         [[maybe_unused]] const IR::Value& offset, |                         [[maybe_unused]] const IR::Value& offset, | ||||||
|                         [[maybe_unused]] std::string_view value) { |                         [[maybe_unused]] std::string_view value) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, |     ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, | ||||||
|             binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, |             binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, | ||||||
|             offset_var); |             offset_var); | ||||||
| } | } | ||||||
| @@ -78,7 +79,7 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, | |||||||
|                         [[maybe_unused]] const IR::Value& offset, |                         [[maybe_unused]] const IR::Value& offset, | ||||||
|                         [[maybe_unused]] std::string_view value) { |                         [[maybe_unused]] std::string_view value) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, |     ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, | ||||||
|             binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, |             binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, | ||||||
|             offset_var); |             offset_var); | ||||||
| } | } | ||||||
| @@ -88,7 +89,7 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, | |||||||
|                          [[maybe_unused]] const IR::Value& offset, |                          [[maybe_unused]] const IR::Value& offset, | ||||||
|                          [[maybe_unused]] std::string_view value) { |                          [[maybe_unused]] std::string_view value) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", |     ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", | ||||||
|             ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, |             ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, | ||||||
|             value, offset_var); |             value, offset_var); | ||||||
| } | } | ||||||
| @@ -98,7 +99,7 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, | |||||||
|                          [[maybe_unused]] const IR::Value& offset, |                          [[maybe_unused]] const IR::Value& offset, | ||||||
|                          [[maybe_unused]] std::string_view value) { |                          [[maybe_unused]] std::string_view value) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", |     ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", | ||||||
|             ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, |             ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, | ||||||
|             value, offset_var); |             value, offset_var); | ||||||
| } | } | ||||||
| @@ -106,14 +107,14 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, | |||||||
| void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||||
|                         std::string_view value) { |                         std::string_view value) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.Add("{}_ssbo{}[{}/4]={};", ctx.stage_name, binding.U32(), offset_var, value); |     ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||||
|                         std::string_view value) { |                         std::string_view value) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); |     ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | ||||||
|     ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); |     ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, | void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, | ||||||
| @@ -121,9 +122,9 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, | |||||||
|                          [[maybe_unused]] const IR::Value& offset, |                          [[maybe_unused]] const IR::Value& offset, | ||||||
|                          [[maybe_unused]] std::string_view value) { |                          [[maybe_unused]] std::string_view value) { | ||||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; |     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||||
|     ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); |     ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | ||||||
|     ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); |     ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | ||||||
|     ctx.Add("{}_ssbo{}[({}+8)/4]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); |     ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); | ||||||
|     ctx.Add("{}_ssbo{}[({}+12)/4]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); |     ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); | ||||||
| } | } | ||||||
| } // namespace Shader::Backend::GLSL | } // namespace Shader::Backend::GLSL | ||||||
|   | |||||||
| @@ -11,70 +11,70 @@ | |||||||
| namespace Shader::Backend::GLSL { | namespace Shader::Backend::GLSL { | ||||||
| void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|                       [[maybe_unused]] std::string_view offset) { |                       [[maybe_unused]] std::string_view offset) { | ||||||
|     ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int({}%4)*8,8);", inst, offset, offset); |     ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|                       [[maybe_unused]] std::string_view offset) { |                       [[maybe_unused]] std::string_view offset) { | ||||||
|     ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int({}%4)*8,8);", inst, offset, offset); |     ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|                        [[maybe_unused]] std::string_view offset) { |                        [[maybe_unused]] std::string_view offset) { | ||||||
|     ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int(({}/2)%2)*16,16);", inst, offset, offset); |     ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|                        [[maybe_unused]] std::string_view offset) { |                        [[maybe_unused]] std::string_view offset) { | ||||||
|     ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int(({}/2)%2)*16,16);", inst, offset, offset); |     ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|                        [[maybe_unused]] std::string_view offset) { |                        [[maybe_unused]] std::string_view offset) { | ||||||
|     ctx.AddU32("{}=smem[{}/4];", inst, offset); |     ctx.AddU32("{}=smem[{}>>2];", inst, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|                        [[maybe_unused]] std::string_view offset) { |                        [[maybe_unused]] std::string_view offset) { | ||||||
|     ctx.AddU32x2("{}=uvec2(smem[{}/4],smem[({}+4)/4]);", inst, offset, offset); |     ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|                         [[maybe_unused]] std::string_view offset) { |                         [[maybe_unused]] std::string_view offset) { | ||||||
|     ctx.AddU32x4("{}=uvec4(smem[{}/4],smem[({}+4)/4],smem[({}+8)/4],smem[({}+12)/4]);", inst, |     ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, | ||||||
|                  offset, offset, offset, offset); |                  offset, offset, offset, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | ||||||
|                        [[maybe_unused]] std::string_view value) { |                        [[maybe_unused]] std::string_view value) { | ||||||
|     ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int({}%4)*8,8);", offset, offset, value, |     ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value, | ||||||
|             offset); |             offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | ||||||
|                         [[maybe_unused]] std::string_view value) { |                         [[maybe_unused]] std::string_view value) { | ||||||
|     ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int(({}/2)%2)*16,16);", offset, offset, value, |     ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset, | ||||||
|             offset); |             value, offset); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | ||||||
|                         [[maybe_unused]] std::string_view value) { |                         [[maybe_unused]] std::string_view value) { | ||||||
|     ctx.Add("smem[{}/4]={};", offset, value); |     ctx.Add("smem[{}>>2]={};", offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | ||||||
|                         [[maybe_unused]] std::string_view value) { |                         [[maybe_unused]] std::string_view value) { | ||||||
|     ctx.Add("smem[{}/4]={}.x;", offset, value); |     ctx.Add("smem[{}>>2]={}.x;", offset, value); | ||||||
|     ctx.Add("smem[({}+4)/4]={}.y;", offset, value); |     ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, | void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, | ||||||
|                          [[maybe_unused]] std::string_view offset, |                          [[maybe_unused]] std::string_view offset, | ||||||
|                          [[maybe_unused]] std::string_view value) { |                          [[maybe_unused]] std::string_view value) { | ||||||
|     ctx.Add("smem[{}/4]={}.x;", offset, value); |     ctx.Add("smem[{}>>2]={}.x;", offset, value); | ||||||
|     ctx.Add("smem[({}+4)/4]={}.y;", offset, value); |     ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); | ||||||
|     ctx.Add("smem[({}+8)/4]={}.z;", offset, value); |     ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); | ||||||
|     ctx.Add("smem[({}+12)/4]={}.w;", offset, value); |     ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace Shader::Backend::GLSL | } // namespace Shader::Backend::GLSL | ||||||
|   | |||||||
| @@ -112,7 +112,7 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||||||
|     const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; |     const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | ||||||
|     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||||||
|     SetInBoundsFlag(ctx, inst); |     SetInBoundsFlag(ctx, inst); | ||||||
|     ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |     ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | ||||||
| @@ -122,7 +122,7 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std | |||||||
|     const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; |     const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | ||||||
|     ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); |     ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | ||||||
|     SetInBoundsFlag(ctx, inst); |     SetInBoundsFlag(ctx, inst); | ||||||
|     ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |     ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||||||
| @@ -133,7 +133,7 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||||||
|     const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; |     const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | ||||||
|     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||||||
|     SetInBoundsFlag(ctx, inst); |     SetInBoundsFlag(ctx, inst); | ||||||
|     ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |     ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||||||
| @@ -144,7 +144,7 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val | |||||||
|     const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; |     const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | ||||||
|     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||||||
|     SetInBoundsFlag(ctx, inst); |     SetInBoundsFlag(ctx, inst); | ||||||
|     ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |     ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user