video_core: Fix SNORM texture buffer emulating error (#9001)
This commit is contained in:
		| @@ -43,6 +43,10 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||||
|     Alias(inst, value); | ||||
| } | ||||
|  | ||||
| void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||||
|     Alias(inst, value); | ||||
| } | ||||
|  | ||||
| void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||||
|     Alias(inst, value); | ||||
| } | ||||
|   | ||||
| @@ -196,6 +196,7 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist | ||||
| void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||||
| void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||||
| void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||||
| void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||||
| void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||||
| void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||||
| void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||||
|   | ||||
| @@ -48,6 +48,10 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) | ||||
|     ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); | ||||
| } | ||||
|  | ||||
| void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||||
|     ctx.AddF32("{}=ftoi({});", inst, value); | ||||
| } | ||||
|  | ||||
| void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { | ||||
|     NotImplemented(); | ||||
| } | ||||
|   | ||||
| @@ -230,6 +230,7 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||||
| void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); | ||||
| void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||||
| void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||||
| void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||||
| void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); | ||||
| void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||||
| void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||||
|   | ||||
| @@ -18,6 +18,10 @@ void EmitBitCastU64F64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| void EmitBitCastS32F32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| void EmitBitCastF16U16(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|   | ||||
| @@ -178,7 +178,8 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||||
| void EmitBitCastU16F16(EmitContext& ctx); | ||||
| Id EmitBitCastU32F32(EmitContext& ctx, Id value); | ||||
| void EmitBitCastU64F64(EmitContext& ctx); | ||||
| void EmitBitCastF16U16(EmitContext& ctx); | ||||
| void EmitBitCastS32F32(EmitContext& ctx); | ||||
| void EmitBitCastF16U16(EmitContext&); | ||||
| Id EmitBitCastF32U32(EmitContext& ctx, Id value); | ||||
| void EmitBitCastF64U64(EmitContext& ctx); | ||||
| Id EmitPackUint2x32(EmitContext& ctx, Id value); | ||||
|   | ||||
| @@ -22,6 +22,8 @@ public: | ||||
|  | ||||
|     [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; | ||||
|  | ||||
|     [[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0; | ||||
|  | ||||
|     [[nodiscard]] virtual u32 ReadViewportTransformState() = 0; | ||||
|  | ||||
|     [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; | ||||
|   | ||||
| @@ -691,6 +691,11 @@ IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) { | ||||
|     return Inst<IR::U32>(Opcode::BitCastU32F32, value); | ||||
| } | ||||
|  | ||||
| template <> | ||||
| IR::S32 IREmitter::BitCast<IR::S32, IR::F32>(const IR::F32& value) { | ||||
|     return Inst<IR::S32>(Opcode::BitCastS32F32, value); | ||||
| } | ||||
|  | ||||
| template <> | ||||
| IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { | ||||
|     return Inst<IR::F32>(Opcode::BitCastF32U32, value); | ||||
|   | ||||
| @@ -37,6 +37,7 @@ constexpr Type U8{Type::U8}; | ||||
| constexpr Type U16{Type::U16}; | ||||
| constexpr Type U32{Type::U32}; | ||||
| constexpr Type U64{Type::U64}; | ||||
| constexpr Type S32{Type::S32}; | ||||
| constexpr Type F16{Type::F16}; | ||||
| constexpr Type F32{Type::F32}; | ||||
| constexpr Type F64{Type::F64}; | ||||
|   | ||||
| @@ -174,6 +174,7 @@ OPCODE(SelectF64,                                           F64,            U1, | ||||
| OPCODE(BitCastU16F16,                                       U16,            F16,                                                                            ) | ||||
| OPCODE(BitCastU32F32,                                       U32,            F32,                                                                            ) | ||||
| OPCODE(BitCastU64F64,                                       U64,            F64,                                                                            ) | ||||
| OPCODE(BitCastS32F32,                                       S32,            F32,                                                                            ) | ||||
| OPCODE(BitCastF16U16,                                       F16,            U16,                                                                            ) | ||||
| OPCODE(BitCastF32U32,                                       F32,            U32,                                                                            ) | ||||
| OPCODE(BitCastF64U64,                                       F64,            U64,                                                                            ) | ||||
|   | ||||
| @@ -24,21 +24,22 @@ enum class Type { | ||||
|     U16 = 1 << 7, | ||||
|     U32 = 1 << 8, | ||||
|     U64 = 1 << 9, | ||||
|     F16 = 1 << 10, | ||||
|     F32 = 1 << 11, | ||||
|     F64 = 1 << 12, | ||||
|     U32x2 = 1 << 13, | ||||
|     U32x3 = 1 << 14, | ||||
|     U32x4 = 1 << 15, | ||||
|     F16x2 = 1 << 16, | ||||
|     F16x3 = 1 << 17, | ||||
|     F16x4 = 1 << 18, | ||||
|     F32x2 = 1 << 19, | ||||
|     F32x3 = 1 << 20, | ||||
|     F32x4 = 1 << 21, | ||||
|     F64x2 = 1 << 22, | ||||
|     F64x3 = 1 << 23, | ||||
|     F64x4 = 1 << 24, | ||||
|     S32 = 1 << 10, | ||||
|     F16 = 1 << 11, | ||||
|     F32 = 1 << 12, | ||||
|     F64 = 1 << 13, | ||||
|     U32x2 = 1 << 14, | ||||
|     U32x3 = 1 << 15, | ||||
|     U32x4 = 1 << 16, | ||||
|     F16x2 = 1 << 17, | ||||
|     F16x3 = 1 << 18, | ||||
|     F16x4 = 1 << 19, | ||||
|     F32x2 = 1 << 20, | ||||
|     F32x3 = 1 << 21, | ||||
|     F32x4 = 1 << 22, | ||||
|     F64x2 = 1 << 23, | ||||
|     F64x3 = 1 << 24, | ||||
|     F64x4 = 1 << 25, | ||||
| }; | ||||
| DECLARE_ENUM_FLAG_OPERATORS(Type) | ||||
|  | ||||
|   | ||||
| @@ -23,6 +23,8 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} | ||||
|  | ||||
| Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} | ||||
|  | ||||
| Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {} | ||||
|  | ||||
| Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} | ||||
|  | ||||
| Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} | ||||
| @@ -69,6 +71,7 @@ bool Value::operator==(const Value& other) const { | ||||
|         return imm_u16 == other.imm_u16; | ||||
|     case Type::U32: | ||||
|     case Type::F32: | ||||
|     case Type::S32: | ||||
|         return imm_u32 == other.imm_u32; | ||||
|     case Type::U64: | ||||
|     case Type::F64: | ||||
|   | ||||
| @@ -44,6 +44,7 @@ public: | ||||
|     explicit Value(u8 value) noexcept; | ||||
|     explicit Value(u16 value) noexcept; | ||||
|     explicit Value(u32 value) noexcept; | ||||
|     explicit Value(s32 value) noexcept; | ||||
|     explicit Value(f32 value) noexcept; | ||||
|     explicit Value(u64 value) noexcept; | ||||
|     explicit Value(f64 value) noexcept; | ||||
| @@ -66,6 +67,7 @@ public: | ||||
|     [[nodiscard]] u8 U8() const; | ||||
|     [[nodiscard]] u16 U16() const; | ||||
|     [[nodiscard]] u32 U32() const; | ||||
|     [[nodiscard]] s32 S32() const; | ||||
|     [[nodiscard]] f32 F32() const; | ||||
|     [[nodiscard]] u64 U64() const; | ||||
|     [[nodiscard]] f64 F64() const; | ||||
| @@ -85,6 +87,7 @@ private: | ||||
|         u8 imm_u8; | ||||
|         u16 imm_u16; | ||||
|         u32 imm_u32; | ||||
|         s32 imm_s32; | ||||
|         f32 imm_f32; | ||||
|         u64 imm_u64; | ||||
|         f64 imm_f64; | ||||
| @@ -266,6 +269,7 @@ using U8 = TypedValue<Type::U8>; | ||||
| using U16 = TypedValue<Type::U16>; | ||||
| using U32 = TypedValue<Type::U32>; | ||||
| using U64 = TypedValue<Type::U64>; | ||||
| using S32 = TypedValue<Type::S32>; | ||||
| using F16 = TypedValue<Type::F16>; | ||||
| using F32 = TypedValue<Type::F32>; | ||||
| using F64 = TypedValue<Type::F64>; | ||||
| @@ -377,6 +381,14 @@ inline u32 Value::U32() const { | ||||
|     return imm_u32; | ||||
| } | ||||
|  | ||||
| inline s32 Value::S32() const { | ||||
|     if (IsIdentity()) { | ||||
|         return inst->Arg(0).S32(); | ||||
|     } | ||||
|     DEBUG_ASSERT(type == Type::S32); | ||||
|     return imm_s32; | ||||
| } | ||||
|  | ||||
| inline f32 Value::F32() const { | ||||
|     if (IsIdentity()) { | ||||
|         return inst->Arg(0).F32(); | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
|  | ||||
| #include <boost/container/small_vector.hpp> | ||||
|  | ||||
| #include "common/settings.h" | ||||
| #include "shader_recompiler/environment.h" | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||||
| @@ -363,6 +364,14 @@ TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { | ||||
|     return env.ReadTextureType(lhs_raw | rhs_raw); | ||||
| } | ||||
|  | ||||
| TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) { | ||||
|     const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; | ||||
|     const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; | ||||
|     const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; | ||||
|     const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; | ||||
|     return env.ReadTexturePixelFormat(lhs_raw | rhs_raw); | ||||
| } | ||||
|  | ||||
| class Descriptors { | ||||
| public: | ||||
|     explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, | ||||
| @@ -451,6 +460,38 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | ||||
|                ir.FPMul(IR::F32(ir.CompositeExtract(coord, 1)), | ||||
|                         ir.FPRecip(ir.ConvertUToF(32, 32, ir.CompositeExtract(texture_size, 1)))))); | ||||
| } | ||||
|  | ||||
| void PathTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_format) { | ||||
|     const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|     auto get_max_value = [pixel_format]() -> float { | ||||
|         switch (pixel_format) { | ||||
|         case TexturePixelFormat::A8B8G8R8_SNORM: | ||||
|         case TexturePixelFormat::R8G8_SNORM: | ||||
|         case TexturePixelFormat::R8_SNORM: | ||||
|             return 1.f / std::numeric_limits<char>::max(); | ||||
|         case TexturePixelFormat::R16G16B16A16_SNORM: | ||||
|         case TexturePixelFormat::R16G16_SNORM: | ||||
|         case TexturePixelFormat::R16_SNORM: | ||||
|             return 1.f / std::numeric_limits<short>::max(); | ||||
|         default: | ||||
|             throw InvalidArgument("Invalid texture pixel format"); | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; | ||||
|     const IR::F32 x(ir.CompositeExtract(new_inst, 0)); | ||||
|     const IR::F32 y(ir.CompositeExtract(new_inst, 1)); | ||||
|     const IR::F32 z(ir.CompositeExtract(new_inst, 2)); | ||||
|     const IR::F32 w(ir.CompositeExtract(new_inst, 3)); | ||||
|     const IR::F16F32F64 max_value(ir.Imm32(get_max_value())); | ||||
|     const IR::Value converted = | ||||
|         ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(x)), max_value), | ||||
|                               ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(y)), max_value), | ||||
|                               ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(z)), max_value), | ||||
|                               ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(w)), max_value)); | ||||
|     inst.ReplaceUsesWith(converted); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TexturePass(Environment& env, IR::Program& program) { | ||||
| @@ -597,6 +638,14 @@ void TexturePass(Environment& env, IR::Program& program) { | ||||
|         } else { | ||||
|             inst->SetArg(0, IR::Value{}); | ||||
|         } | ||||
|  | ||||
|         if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && | ||||
|             inst->GetOpcode() == IR::Opcode::ImageFetch && flags.type == TextureType::Buffer) { | ||||
|             const auto pixel_format = ReadTexturePixelFormat(env, cbuf); | ||||
|             if (pixel_format != TexturePixelFormat::OTHER) { | ||||
|                 PathTexelFetch(*texture_inst.block, *texture_inst.inst, pixel_format); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -29,6 +29,16 @@ enum class TextureType : u32 { | ||||
| }; | ||||
| constexpr u32 NUM_TEXTURE_TYPES = 9; | ||||
|  | ||||
| enum class TexturePixelFormat : u32 { | ||||
|     A8B8G8R8_SNORM, | ||||
|     R8_SNORM, | ||||
|     R8G8_SNORM, | ||||
|     R16G16B16A16_SNORM, | ||||
|     R16G16_SNORM, | ||||
|     R16_SNORM, | ||||
|     OTHER | ||||
| }; | ||||
|  | ||||
| enum class ImageFormat : u32 { | ||||
|     Typeless, | ||||
|     R8_UINT, | ||||
|   | ||||
| @@ -29,17 +29,17 @@ constexpr std::array PROGRAM_LUT{ | ||||
| [[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { | ||||
|     switch (gl_format) { | ||||
|     case GL_RGBA8_SNORM: | ||||
|         return GL_RGBA8; | ||||
|         return GL_RGBA8I; | ||||
|     case GL_R8_SNORM: | ||||
|         return GL_R8; | ||||
|         return GL_R8I; | ||||
|     case GL_RGBA16_SNORM: | ||||
|         return GL_RGBA16; | ||||
|         return GL_RGBA16I; | ||||
|     case GL_R16_SNORM: | ||||
|         return GL_R16; | ||||
|         return GL_R16I; | ||||
|     case GL_RG16_SNORM: | ||||
|         return GL_RG16; | ||||
|         return GL_RG16I; | ||||
|     case GL_RG8_SNORM: | ||||
|         return GL_RG8; | ||||
|         return GL_RG8I; | ||||
|     default: | ||||
|         return gl_format; | ||||
|     } | ||||
| @@ -96,9 +96,6 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { | ||||
|     texture.Create(GL_TEXTURE_BUFFER); | ||||
|     const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; | ||||
|     const GLenum texture_format{GetTextureBufferFormat(gl_format)}; | ||||
|     if (texture_format != gl_format) { | ||||
|         LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); | ||||
|     } | ||||
|     glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); | ||||
|     views.push_back({ | ||||
|         .offset = offset, | ||||
|   | ||||
| @@ -504,8 +504,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | ||||
|             } | ||||
|         } | ||||
|         if (info.uses_render_area) { | ||||
|             const auto render_area_width(static_cast<GLfloat>(regs.render_area.width)); | ||||
|             const auto render_area_height(static_cast<GLfloat>(regs.render_area.height)); | ||||
|             const auto render_area_width(static_cast<GLfloat>(regs.surface_clip.width)); | ||||
|             const auto render_area_height(static_cast<GLfloat>(regs.surface_clip.height)); | ||||
|             if (use_assembly) { | ||||
|                 glProgramLocalParameter4fARB(AssemblyStage(stage), 1, render_area_width, | ||||
|                                              render_area_height, 0.0f, 0.0f); | ||||
|   | ||||
| @@ -618,11 +618,11 @@ void RasterizerOpenGL::SyncViewport() { | ||||
|             } | ||||
|             flags[Dirty::Viewport0 + index] = false; | ||||
|  | ||||
|             if (!regs.viewport_transform_enabled) { | ||||
|                 const auto x = static_cast<GLfloat>(regs.render_area.x); | ||||
|                 const auto y = static_cast<GLfloat>(regs.render_area.y); | ||||
|                 const auto width = static_cast<GLfloat>(regs.render_area.width); | ||||
|                 const auto height = static_cast<GLfloat>(regs.render_area.height); | ||||
|             if (!regs.viewport_scale_offset_enbled) { | ||||
|                 const auto x = static_cast<GLfloat>(regs.surface_clip.x); | ||||
|                 const auto y = static_cast<GLfloat>(regs.surface_clip.y); | ||||
|                 const auto width = static_cast<GLfloat>(regs.surface_clip.width); | ||||
|                 const auto height = static_cast<GLfloat>(regs.surface_clip.height); | ||||
|                 glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f, | ||||
|                                    height != 0.0f ? height : 1.0f); | ||||
|                 continue; | ||||
|   | ||||
| @@ -444,8 +444,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | ||||
|         const auto& info{stage_infos[0]}; | ||||
|         if (info.uses_render_area) { | ||||
|             render_area.uses_render_area = true; | ||||
|             render_area.words = {static_cast<float>(regs.render_area.width), | ||||
|                                  static_cast<float>(regs.render_area.height)}; | ||||
|             render_area.words = {static_cast<float>(regs.surface_clip.width), | ||||
|                                  static_cast<float>(regs.surface_clip.height)}; | ||||
|         } | ||||
|     }}; | ||||
|     if constexpr (Spec::enabled_stages[0]) { | ||||
|   | ||||
| @@ -683,11 +683,11 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | ||||
|     if (!state_tracker.TouchViewports()) { | ||||
|         return; | ||||
|     } | ||||
|     if (!regs.viewport_transform_enabled) { | ||||
|         const auto x = static_cast<float>(regs.render_area.x); | ||||
|         const auto y = static_cast<float>(regs.render_area.y); | ||||
|         const auto width = static_cast<float>(regs.render_area.width); | ||||
|         const auto height = static_cast<float>(regs.render_area.height); | ||||
|     if (!regs.viewport_scale_offset_enbled) { | ||||
|         const auto x = static_cast<float>(regs.surface_clip.x); | ||||
|         const auto y = static_cast<float>(regs.surface_clip.y); | ||||
|         const auto width = static_cast<float>(regs.surface_clip.width); | ||||
|         const auto height = static_cast<float>(regs.surface_clip.height); | ||||
|         VkViewport viewport{ | ||||
|             .x = x, | ||||
|             .y = y, | ||||
|   | ||||
| @@ -19,6 +19,7 @@ | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/shader_environment.h" | ||||
| #include "video_core/texture_cache/format_lookup_table.h" | ||||
| #include "video_core/textures/texture.h" | ||||
|  | ||||
| namespace VideoCommon { | ||||
| @@ -33,7 +34,7 @@ static u64 MakeCbufKey(u32 index, u32 offset) { | ||||
|     return (static_cast<u64>(index) << 32) | offset; | ||||
| } | ||||
|  | ||||
| static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { | ||||
| static Shader::TextureType ConvertTextureType(const Tegra::Texture::TICEntry& entry) { | ||||
|     switch (entry.texture_type) { | ||||
|     case Tegra::Texture::TextureType::Texture1D: | ||||
|         return Shader::TextureType::Color1D; | ||||
| @@ -59,6 +60,26 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture::TICEntry& entry) { | ||||
|     switch (PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type, | ||||
|                                        entry.a_type, entry.srgb_conversion)) { | ||||
|     case VideoCore::Surface::PixelFormat::A8B8G8R8_SNORM: | ||||
|         return Shader::TexturePixelFormat::A8B8G8R8_SNORM; | ||||
|     case VideoCore::Surface::PixelFormat::R8_SNORM: | ||||
|         return Shader::TexturePixelFormat::R8_SNORM; | ||||
|     case VideoCore::Surface::PixelFormat::R8G8_SNORM: | ||||
|         return Shader::TexturePixelFormat::R8G8_SNORM; | ||||
|     case VideoCore::Surface::PixelFormat::R16G16B16A16_SNORM: | ||||
|         return Shader::TexturePixelFormat::R16G16B16A16_SNORM; | ||||
|     case VideoCore::Surface::PixelFormat::R16G16_SNORM: | ||||
|         return Shader::TexturePixelFormat::R16G16_SNORM; | ||||
|     case VideoCore::Surface::PixelFormat::R16_SNORM: | ||||
|         return Shader::TexturePixelFormat::R16_SNORM; | ||||
|     default: | ||||
|         return Shader::TexturePixelFormat::OTHER; | ||||
|     } | ||||
| } | ||||
|  | ||||
| static std::string_view StageToPrefix(Shader::Stage stage) { | ||||
|     switch (stage) { | ||||
|     case Shader::Stage::VertexB: | ||||
| @@ -178,10 +199,13 @@ void GenericEnvironment::Dump(u64 hash) { | ||||
| void GenericEnvironment::Serialize(std::ofstream& file) const { | ||||
|     const u64 code_size{static_cast<u64>(CachedSize())}; | ||||
|     const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | ||||
|     const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; | ||||
|     const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | ||||
|  | ||||
|     file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | ||||
|         .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | ||||
|         .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), | ||||
|                sizeof(num_texture_pixel_formats)) | ||||
|         .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||||
|         .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||||
|         .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | ||||
| @@ -196,6 +220,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { | ||||
|         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||||
|             .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||||
|     } | ||||
|     for (const auto& [key, format] : texture_pixel_formats) { | ||||
|         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||||
|             .write(reinterpret_cast<const char*>(&format), sizeof(format)); | ||||
|     } | ||||
|     for (const auto& [key, type] : cbuf_values) { | ||||
|         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||||
|             .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||||
| @@ -239,15 +267,13 @@ std::optional<u64> GenericEnvironment::TryFindSize() { | ||||
|     return std::nullopt; | ||||
| } | ||||
|  | ||||
| Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, | ||||
|                                                             bool via_header_index, u32 raw) { | ||||
| Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, | ||||
|                                                              bool via_header_index, u32 raw) { | ||||
|     const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; | ||||
|     const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; | ||||
|     Tegra::Texture::TICEntry entry; | ||||
|     gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | ||||
|     const Shader::TextureType result{ConvertType(entry)}; | ||||
|     texture_types.emplace(raw, result); | ||||
|     return result; | ||||
|     return entry; | ||||
| } | ||||
|  | ||||
| GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
| @@ -307,13 +333,26 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||||
| Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { | ||||
|     const auto& regs{maxwell3d->regs}; | ||||
|     const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; | ||||
|     return ReadTextureTypeImpl(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, | ||||
|                                handle); | ||||
|     auto entry = | ||||
|         ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle); | ||||
|     const Shader::TextureType result{ConvertTextureType(entry)}; | ||||
|     texture_types.emplace(handle, result); | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| Shader::TexturePixelFormat GraphicsEnvironment::ReadTexturePixelFormat(u32 handle) { | ||||
|     const auto& regs{maxwell3d->regs}; | ||||
|     const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; | ||||
|     auto entry = | ||||
|         ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle); | ||||
|     const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry)); | ||||
|     texture_pixel_formats.emplace(handle, result); | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| u32 GraphicsEnvironment::ReadViewportTransformState() { | ||||
|     const auto& regs{maxwell3d->regs}; | ||||
|     viewport_transform_state = regs.viewport_transform_enabled; | ||||
|     viewport_transform_state = regs.viewport_scale_offset_enbled; | ||||
|     return viewport_transform_state; | ||||
| } | ||||
|  | ||||
| @@ -345,7 +384,19 @@ u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||||
| Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { | ||||
|     const auto& regs{kepler_compute->regs}; | ||||
|     const auto& qmd{kepler_compute->launch_description}; | ||||
|     return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); | ||||
|     auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); | ||||
|     const Shader::TextureType result{ConvertTextureType(entry)}; | ||||
|     texture_types.emplace(handle, result); | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| Shader::TexturePixelFormat ComputeEnvironment::ReadTexturePixelFormat(u32 handle) { | ||||
|     const auto& regs{kepler_compute->regs}; | ||||
|     const auto& qmd{kepler_compute->launch_description}; | ||||
|     auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); | ||||
|     const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry)); | ||||
|     texture_pixel_formats.emplace(handle, result); | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| u32 ComputeEnvironment::ReadViewportTransformState() { | ||||
| @@ -355,9 +406,12 @@ u32 ComputeEnvironment::ReadViewportTransformState() { | ||||
| void FileEnvironment::Deserialize(std::ifstream& file) { | ||||
|     u64 code_size{}; | ||||
|     u64 num_texture_types{}; | ||||
|     u64 num_texture_pixel_formats{}; | ||||
|     u64 num_cbuf_values{}; | ||||
|     file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | ||||
|         .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | ||||
|         .read(reinterpret_cast<char*>(&num_texture_pixel_formats), | ||||
|               sizeof(num_texture_pixel_formats)) | ||||
|         .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||||
|         .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||||
|         .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | ||||
| @@ -375,6 +429,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | ||||
|             .read(reinterpret_cast<char*>(&type), sizeof(type)); | ||||
|         texture_types.emplace(key, type); | ||||
|     } | ||||
|     for (size_t i = 0; i < num_texture_pixel_formats; ++i) { | ||||
|         u32 key; | ||||
|         Shader::TexturePixelFormat format; | ||||
|         file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||||
|             .read(reinterpret_cast<char*>(&format), sizeof(format)); | ||||
|         texture_pixel_formats.emplace(key, format); | ||||
|     } | ||||
|     for (size_t i = 0; i < num_cbuf_values; ++i) { | ||||
|         u64 key; | ||||
|         u32 value; | ||||
| @@ -422,6 +483,14 @@ Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { | ||||
|     return it->second; | ||||
| } | ||||
|  | ||||
| Shader::TexturePixelFormat FileEnvironment::ReadTexturePixelFormat(u32 handle) { | ||||
|     const auto it{texture_pixel_formats.find(handle)}; | ||||
|     if (it == texture_pixel_formats.end()) { | ||||
|         throw Shader::LogicError("Uncached read texture pixel format"); | ||||
|     } | ||||
|     return it->second; | ||||
| } | ||||
|  | ||||
| u32 FileEnvironment::ReadViewportTransformState() { | ||||
|     return viewport_transform_state; | ||||
| } | ||||
|   | ||||
| @@ -63,14 +63,15 @@ public: | ||||
| protected: | ||||
|     std::optional<u64> TryFindSize(); | ||||
|  | ||||
|     Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, | ||||
|                                             u32 raw); | ||||
|     Tegra::Texture::TICEntry ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, | ||||
|                                              bool via_header_index, u32 raw); | ||||
|  | ||||
|     Tegra::MemoryManager* gpu_memory{}; | ||||
|     GPUVAddr program_base{}; | ||||
|  | ||||
|     std::vector<u64> code; | ||||
|     std::unordered_map<u32, Shader::TextureType> texture_types; | ||||
|     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | ||||
|     std::unordered_map<u64, u32> cbuf_values; | ||||
|  | ||||
|     u32 local_memory_size{}; | ||||
| @@ -104,6 +105,8 @@ public: | ||||
|  | ||||
|     Shader::TextureType ReadTextureType(u32 handle) override; | ||||
|  | ||||
|     Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; | ||||
|  | ||||
|     u32 ReadViewportTransformState() override; | ||||
|  | ||||
| private: | ||||
| @@ -124,6 +127,8 @@ public: | ||||
|  | ||||
|     Shader::TextureType ReadTextureType(u32 handle) override; | ||||
|  | ||||
|     Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; | ||||
|  | ||||
|     u32 ReadViewportTransformState() override; | ||||
|  | ||||
| private: | ||||
| @@ -149,6 +154,8 @@ public: | ||||
|  | ||||
|     [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; | ||||
|  | ||||
|     [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; | ||||
|  | ||||
|     [[nodiscard]] u32 ReadViewportTransformState() override; | ||||
|  | ||||
|     [[nodiscard]] u32 LocalMemorySize() const override; | ||||
| @@ -164,6 +171,7 @@ public: | ||||
| private: | ||||
|     std::unique_ptr<u64[]> code; | ||||
|     std::unordered_map<u32, Shader::TextureType> texture_types; | ||||
|     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | ||||
|     std::unordered_map<u64, u32> cbuf_values; | ||||
|     std::array<u32, 3> workgroup_size{}; | ||||
|     u32 local_memory_size{}; | ||||
|   | ||||
| @@ -516,7 +516,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | ||||
|     const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||||
|     const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; | ||||
|  | ||||
|     UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | ||||
|     UNIMPLEMENTED_IF(copy.image_offset.x != 0); | ||||
|     UNIMPLEMENTED_IF(copy.image_offset.y != 0); | ||||
|     UNIMPLEMENTED_IF(copy.image_offset.z != 0); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user