Merge pull request #776 from bunnei/pica-state
GPU: Consolidate Pica state
This commit is contained in:
		| @@ -228,7 +228,7 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& | |||||||
|  |  | ||||||
| #define COMMAND_IN_RANGE(cmd_id, reg_name)   \ | #define COMMAND_IN_RANGE(cmd_id, reg_name)   \ | ||||||
|     (cmd_id >= PICA_REG_INDEX(reg_name) &&   \ |     (cmd_id >= PICA_REG_INDEX(reg_name) &&   \ | ||||||
|      cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::registers.reg_name)) / 4) |      cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::g_state.regs.reg_name)) / 4) | ||||||
|  |  | ||||||
| void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { | void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { | ||||||
|     const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); |     const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); | ||||||
| @@ -244,8 +244,8 @@ void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { | |||||||
|         } else { |         } else { | ||||||
|             index = 2; |             index = 2; | ||||||
|         } |         } | ||||||
|         auto config = Pica::registers.GetTextures()[index].config; |         auto config = Pica::g_state.regs.GetTextures()[index].config; | ||||||
|         auto format = Pica::registers.GetTextures()[index].format; |         auto format = Pica::g_state.regs.GetTextures()[index].format; | ||||||
|         auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); |         auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); | ||||||
|  |  | ||||||
|         // TODO: Instead, emit a signal here to be caught by the main window widget. |         // TODO: Instead, emit a signal here to be caught by the main window widget. | ||||||
| @@ -270,8 +270,8 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { | |||||||
|         } else { |         } else { | ||||||
|             index = 2; |             index = 2; | ||||||
|         } |         } | ||||||
|         auto config = Pica::registers.GetTextures()[index].config; |         auto config = Pica::g_state.regs.GetTextures()[index].config; | ||||||
|         auto format = Pica::registers.GetTextures()[index].format; |         auto format = Pica::g_state.regs.GetTextures()[index].format; | ||||||
|  |  | ||||||
|         auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); |         auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); | ||||||
|         u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); |         u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); | ||||||
|   | |||||||
| @@ -178,7 +178,7 @@ void GraphicsFramebufferWidget::OnUpdate() | |||||||
|     { |     { | ||||||
|         // TODO: Store a reference to the registers in the debug context instead of accessing them directly... |         // TODO: Store a reference to the registers in the debug context instead of accessing them directly... | ||||||
|  |  | ||||||
|         const auto& framebuffer = Pica::registers.framebuffer; |         const auto& framebuffer = Pica::g_state.regs.framebuffer; | ||||||
|  |  | ||||||
|         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); |         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|         framebuffer_width = framebuffer.GetWidth(); |         framebuffer_width = framebuffer.GetWidth(); | ||||||
| @@ -191,7 +191,7 @@ void GraphicsFramebufferWidget::OnUpdate() | |||||||
|  |  | ||||||
|     case Source::DepthBuffer: |     case Source::DepthBuffer: | ||||||
|     { |     { | ||||||
|         const auto& framebuffer = Pica::registers.framebuffer; |         const auto& framebuffer = Pica::g_state.regs.framebuffer; | ||||||
|  |  | ||||||
|         framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress(); |         framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|         framebuffer_width = framebuffer.GetWidth(); |         framebuffer_width = framebuffer.GetWidth(); | ||||||
|   | |||||||
| @@ -253,13 +253,13 @@ void GraphicsVertexShaderModel::OnUpdate() | |||||||
|  |  | ||||||
|     info.Clear(); |     info.Clear(); | ||||||
|  |  | ||||||
|     for (auto instr : Pica::VertexShader::GetShaderBinary()) |     for (auto instr : Pica::g_state.vs.program_code) | ||||||
|         info.code.push_back({instr}); |         info.code.push_back({instr}); | ||||||
|  |  | ||||||
|     for (auto pattern : Pica::VertexShader::GetSwizzlePatterns()) |     for (auto pattern : Pica::g_state.vs.swizzle_data) | ||||||
|         info.swizzle_info.push_back({pattern}); |         info.swizzle_info.push_back({pattern}); | ||||||
|  |  | ||||||
|     info.labels.insert({Pica::registers.vs_main_offset, "main"}); |     info.labels.insert({ Pica::g_state.regs.vs_main_offset, "main" }); | ||||||
|  |  | ||||||
|     endResetModel(); |     endResetModel(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ set(SRCS | |||||||
|             debug_utils/debug_utils.cpp |             debug_utils/debug_utils.cpp | ||||||
|             clipper.cpp |             clipper.cpp | ||||||
|             command_processor.cpp |             command_processor.cpp | ||||||
|  |             pica.cpp | ||||||
|             primitive_assembly.cpp |             primitive_assembly.cpp | ||||||
|             rasterizer.cpp |             rasterizer.cpp | ||||||
|             utils.cpp |             utils.cpp | ||||||
|   | |||||||
| @@ -58,12 +58,13 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||||||
|         float24 offset_z; |         float24 offset_z; | ||||||
|     } viewport; |     } viewport; | ||||||
|  |  | ||||||
|     viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x); |     const auto& regs = g_state.regs; | ||||||
|     viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y); |     viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); | ||||||
|     viewport.offset_x   = float24::FromFloat32(static_cast<float>(registers.viewport_corner.x)); |     viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); | ||||||
|     viewport.offset_y   = float24::FromFloat32(static_cast<float>(registers.viewport_corner.y)); |     viewport.offset_x   = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); | ||||||
|     viewport.zscale     = float24::FromRawFloat24(registers.viewport_depth_range); |     viewport.offset_y   = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); | ||||||
|     viewport.offset_z   = float24::FromRawFloat24(registers.viewport_depth_far_plane); |     viewport.zscale     = float24::FromRawFloat24(regs.viewport_depth_range); | ||||||
|  |     viewport.offset_z   = float24::FromRawFloat24(regs.viewport_depth_far_plane); | ||||||
|  |  | ||||||
|     float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; |     float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; | ||||||
|     vtx.color *= inv_w; |     vtx.color *= inv_w; | ||||||
|   | |||||||
| @@ -21,8 +21,6 @@ | |||||||
|  |  | ||||||
| namespace Pica { | namespace Pica { | ||||||
|  |  | ||||||
| Regs registers; |  | ||||||
|  |  | ||||||
| namespace CommandProcessor { | namespace CommandProcessor { | ||||||
|  |  | ||||||
| static int float_regs_counter = 0; | static int float_regs_counter = 0; | ||||||
| @@ -36,8 +34,9 @@ static u32 default_attr_write_buffer[3]; | |||||||
| Common::Profiling::TimingCategory category_drawing("Drawing"); | Common::Profiling::TimingCategory category_drawing("Drawing"); | ||||||
|  |  | ||||||
| static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|  |     auto& regs = g_state.regs; | ||||||
|  |  | ||||||
|     if (id >= registers.NumIds()) |     if (id >= regs.NumIds()) | ||||||
|         return; |         return; | ||||||
|  |  | ||||||
|     // If we're skipping this frame, only allow trigger IRQ |     // If we're skipping this frame, only allow trigger IRQ | ||||||
| @@ -45,13 +44,13 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|         return; |         return; | ||||||
|  |  | ||||||
|     // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value |     // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value | ||||||
|     u32 old_value = registers[id]; |     u32 old_value = regs[id]; | ||||||
|     registers[id] = (old_value & ~mask) | (value & mask); |     regs[id] = (old_value & ~mask) | (value & mask); | ||||||
|  |  | ||||||
|     if (g_debug_context) |     if (g_debug_context) | ||||||
|         g_debug_context->OnEvent(DebugContext::Event::CommandLoaded, reinterpret_cast<void*>(&id)); |         g_debug_context->OnEvent(DebugContext::Event::CommandLoaded, reinterpret_cast<void*>(&id)); | ||||||
|  |  | ||||||
|     DebugUtils::OnPicaRegWrite(id, registers[id]); |     DebugUtils::OnPicaRegWrite(id, regs[id]); | ||||||
|  |  | ||||||
|     switch(id) { |     switch(id) { | ||||||
|         // Trigger IRQ |         // Trigger IRQ | ||||||
| @@ -65,12 +64,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|         { |         { | ||||||
|             Common::Profiling::ScopeTimer scope_timer(category_drawing); |             Common::Profiling::ScopeTimer scope_timer(category_drawing); | ||||||
|  |  | ||||||
|             DebugUtils::DumpTevStageConfig(registers.GetTevStages()); |             DebugUtils::DumpTevStageConfig(regs.GetTevStages()); | ||||||
|  |  | ||||||
|             if (g_debug_context) |             if (g_debug_context) | ||||||
|                 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); |                 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); | ||||||
|  |  | ||||||
|             const auto& attribute_config = registers.vertex_attributes; |             const auto& attribute_config = regs.vertex_attributes; | ||||||
|             const u32 base_address = attribute_config.GetPhysicalBaseAddress(); |             const u32 base_address = attribute_config.GetPhysicalBaseAddress(); | ||||||
|  |  | ||||||
|             // Information about internal vertex attributes |             // Information about internal vertex attributes | ||||||
| @@ -103,16 +102,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|             // Load vertices |             // Load vertices | ||||||
|             bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); |             bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); | ||||||
|  |  | ||||||
|             const auto& index_info = registers.index_array; |             const auto& index_info = regs.index_array; | ||||||
|             const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); |             const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); | ||||||
|             const u16* index_address_16 = (u16*)index_address_8; |             const u16* index_address_16 = (u16*)index_address_8; | ||||||
|             bool index_u16 = index_info.format != 0; |             bool index_u16 = index_info.format != 0; | ||||||
|  |  | ||||||
|             DebugUtils::GeometryDumper geometry_dumper; |             DebugUtils::GeometryDumper geometry_dumper; | ||||||
|             PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(registers.triangle_topology.Value()); |             PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value()); | ||||||
|             PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(registers.triangle_topology.Value()); |             PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value()); | ||||||
|  |  | ||||||
|             for (unsigned int index = 0; index < registers.num_vertices; ++index) |             for (unsigned int index = 0; index < regs.num_vertices; ++index) | ||||||
|             { |             { | ||||||
|                 unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; |                 unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; | ||||||
|  |  | ||||||
| @@ -131,7 +130,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { |                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | ||||||
|                     // Load the default attribute if we're configured to do so, this data will be overwritten by the loader data if it's set |                     // Load the default attribute if we're configured to do so, this data will be overwritten by the loader data if it's set | ||||||
|                     if (attribute_config.IsDefaultAttribute(i)) { |                     if (attribute_config.IsDefaultAttribute(i)) { | ||||||
|                         input.attr[i] = VertexShader::GetDefaultAttribute(i); |                         input.attr[i] = g_state.vs.default_attributes[i]; | ||||||
|                         LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", |                         LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | ||||||
|                                   i, vertex, index, |                                   i, vertex, index, | ||||||
|                                   input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |                                   input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | ||||||
| @@ -216,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|  |  | ||||||
|         case PICA_REG_INDEX(vs_bool_uniforms): |         case PICA_REG_INDEX(vs_bool_uniforms): | ||||||
|             for (unsigned i = 0; i < 16; ++i) |             for (unsigned i = 0; i < 16; ++i) | ||||||
|                 VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i)) != 0; |                 g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0; | ||||||
|  |  | ||||||
|             break; |             break; | ||||||
|  |  | ||||||
| @@ -226,8 +225,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|         case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): |         case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): | ||||||
|         { |         { | ||||||
|             int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); |             int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); | ||||||
|             auto values = registers.vs_int_uniforms[index]; |             auto values = regs.vs_int_uniforms[index]; | ||||||
|             VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w); |             g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | ||||||
|             LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", |             LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", | ||||||
|                       index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); |                       index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); | ||||||
|             break; |             break; | ||||||
| @@ -242,7 +241,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): | ||||||
|         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): | ||||||
|         { |         { | ||||||
|             auto& uniform_setup = registers.vs_uniform_setup; |             auto& uniform_setup = regs.vs_uniform_setup; | ||||||
|  |  | ||||||
|             // TODO: Does actual hardware indeed keep an intermediate buffer or does |             // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||||||
|             //       it directly write the values? |             //       it directly write the values? | ||||||
| @@ -255,7 +254,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|                 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { |                 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||||||
|                 float_regs_counter = 0; |                 float_regs_counter = 0; | ||||||
|  |  | ||||||
|                 auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index); |                 auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; | ||||||
|  |  | ||||||
|                 if (uniform_setup.index > 95) { |                 if (uniform_setup.index > 95) { | ||||||
|                     LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); |                     LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); | ||||||
| @@ -299,14 +298,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|             if (default_attr_counter >= 3) { |             if (default_attr_counter >= 3) { | ||||||
|                 default_attr_counter = 0; |                 default_attr_counter = 0; | ||||||
|  |  | ||||||
|                 auto& setup = registers.vs_default_attributes_setup; |                 auto& setup = regs.vs_default_attributes_setup; | ||||||
|  |  | ||||||
|                 if (setup.index >= 16) { |                 if (setup.index >= 16) { | ||||||
|                     LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); |                     LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); | ||||||
|                     break; |                     break; | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 Math::Vec4<float24>& attribute = VertexShader::GetDefaultAttribute(setup.index); |                 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; | ||||||
|                  |                  | ||||||
|                 // NOTE: The destination component order indeed is "backwards" |                 // NOTE: The destination component order indeed is "backwards" | ||||||
|                 attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); |                 attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); | ||||||
| @@ -334,8 +333,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): | ||||||
|         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): | ||||||
|         { |         { | ||||||
|             VertexShader::SubmitShaderMemoryChange(registers.vs_program.offset, value); |             g_state.vs.program_code[regs.vs_program.offset] = value; | ||||||
|             registers.vs_program.offset++; |             regs.vs_program.offset++; | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -349,8 +348,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): | ||||||
|         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): | ||||||
|         { |         { | ||||||
|             VertexShader::SubmitSwizzleDataChange(registers.vs_swizzle_patterns.offset, value); |             g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value; | ||||||
|             registers.vs_swizzle_patterns.offset++; |             regs.vs_swizzle_patterns.offset++; | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -632,7 +632,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | |||||||
|             info.width = texture_config.width; |             info.width = texture_config.width; | ||||||
|             info.height = texture_config.height; |             info.height = texture_config.height; | ||||||
|             info.stride = row_stride; |             info.stride = row_stride; | ||||||
|             info.format = registers.texture0_format; |             info.format = g_state.regs.texture0_format; | ||||||
|             Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); |             Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); | ||||||
|             buf[3 * x + y * row_stride    ] = texture_color.r(); |             buf[3 * x + y * row_stride    ] = texture_color.r(); | ||||||
|             buf[3 * x + y * row_stride + 1] = texture_color.g(); |             buf[3 * x + y * row_stride + 1] = texture_color.g(); | ||||||
|   | |||||||
							
								
								
									
										20
									
								
								src/video_core/pica.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								src/video_core/pica.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | // Copyright 2015 Citra Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include <string.h> | ||||||
|  |  | ||||||
|  | #include "pica.h" | ||||||
|  |  | ||||||
|  | namespace Pica { | ||||||
|  |  | ||||||
|  | State g_state; | ||||||
|  |  | ||||||
|  | void Init() { | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Shutdown() { | ||||||
|  |     memset(&g_state, 0, sizeof(State)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } | ||||||
| @@ -16,6 +16,8 @@ | |||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
|  |  | ||||||
|  | #include "math.h"  | ||||||
|  |  | ||||||
| namespace Pica { | namespace Pica { | ||||||
|  |  | ||||||
| // Returns index corresponding to the Regs member labeled by field_name | // Returns index corresponding to the Regs member labeled by field_name | ||||||
| @@ -356,50 +358,50 @@ struct Regs { | |||||||
|                  tev_stage4, tev_stage5 }; |                  tev_stage4, tev_stage5 }; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     struct { |     enum class BlendEquation : u32 { | ||||||
|         enum CompareFunc : u32 { |         Add             = 0, | ||||||
|             Never               = 0, |         Subtract        = 1, | ||||||
|             Always              = 1, |         ReverseSubtract = 2, | ||||||
|             Equal               = 2, |         Min             = 3, | ||||||
|             NotEqual            = 3, |         Max             = 4, | ||||||
|             LessThan            = 4, |     }; | ||||||
|             LessThanOrEqual     = 5, |  | ||||||
|             GreaterThan         = 6, |  | ||||||
|             GreaterThanOrEqual  = 7, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|  |     enum class BlendFactor : u32 { | ||||||
|  |         Zero                    = 0, | ||||||
|  |         One                     = 1, | ||||||
|  |         SourceColor             = 2, | ||||||
|  |         OneMinusSourceColor     = 3, | ||||||
|  |         DestColor               = 4, | ||||||
|  |         OneMinusDestColor       = 5, | ||||||
|  |         SourceAlpha             = 6, | ||||||
|  |         OneMinusSourceAlpha     = 7, | ||||||
|  |         DestAlpha               = 8, | ||||||
|  |         OneMinusDestAlpha       = 9, | ||||||
|  |         ConstantColor           = 10, | ||||||
|  |         OneMinusConstantColor   = 11, | ||||||
|  |         ConstantAlpha           = 12, | ||||||
|  |         OneMinusConstantAlpha   = 13, | ||||||
|  |         SourceAlphaSaturate     = 14, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     enum class CompareFunc : u32 { | ||||||
|  |         Never              = 0, | ||||||
|  |         Always             = 1, | ||||||
|  |         Equal              = 2, | ||||||
|  |         NotEqual           = 3, | ||||||
|  |         LessThan           = 4, | ||||||
|  |         LessThanOrEqual    = 5, | ||||||
|  |         GreaterThan        = 6, | ||||||
|  |         GreaterThanOrEqual = 7, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     struct { | ||||||
|         union { |         union { | ||||||
|             // If false, logic blending is used |             // If false, logic blending is used | ||||||
|             BitField<8, 1, u32> alphablend_enable; |             BitField<8, 1, u32> alphablend_enable; | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         union { |         union { | ||||||
|             enum class BlendEquation : u32 { |  | ||||||
|                 Add             = 0, |  | ||||||
|                 Subtract        = 1, |  | ||||||
|                 ReverseSubtract = 2, |  | ||||||
|                 Min             = 3, |  | ||||||
|                 Max             = 4 |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             enum BlendFactor : u32 { |  | ||||||
|                 Zero                    = 0, |  | ||||||
|                 One                     = 1, |  | ||||||
|                 SourceColor             = 2, |  | ||||||
|                 OneMinusSourceColor     = 3, |  | ||||||
|                 DestColor               = 4, |  | ||||||
|                 OneMinusDestColor       = 5, |  | ||||||
|                 SourceAlpha             = 6, |  | ||||||
|                 OneMinusSourceAlpha     = 7, |  | ||||||
|                 DestAlpha               = 8, |  | ||||||
|                 OneMinusDestAlpha       = 9, |  | ||||||
|                 ConstantColor           = 10, |  | ||||||
|                 OneMinusConstantColor   = 11, |  | ||||||
|                 ConstantAlpha           = 12, |  | ||||||
|                 OneMinusConstantAlpha   = 13, |  | ||||||
|                 SourceAlphaSaturate     = 14 |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             BitField< 0, 8, BlendEquation> blend_equation_rgb; |             BitField< 0, 8, BlendEquation> blend_equation_rgb; | ||||||
|             BitField< 8, 8, BlendEquation> blend_equation_a; |             BitField< 8, 8, BlendEquation> blend_equation_a; | ||||||
|  |  | ||||||
| @@ -454,49 +456,19 @@ struct Regs { | |||||||
|         INSERT_PADDING_WORDS(0x8); |         INSERT_PADDING_WORDS(0x8); | ||||||
|     } output_merger; |     } output_merger; | ||||||
|  |  | ||||||
|     enum DepthFormat : u32 { |     // Components are laid out in reverse byte order, most significant bits first. | ||||||
|         D16    = 0, |     enum class ColorFormat : u32 { | ||||||
|  |         RGBA8  = 0, | ||||||
|         D24    = 2, |         RGB8   = 1, | ||||||
|         D24S8  = 3 |         RGB5A1 = 2, | ||||||
|  |         RGB565 = 3, | ||||||
|  |         RGBA4  = 4, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // Returns the number of bytes in the specified depth format |     enum class DepthFormat : u32 { | ||||||
|     static u32 BytesPerDepthPixel(DepthFormat format) { |         D16   = 0, | ||||||
|         switch (format) { |         D24   = 2, | ||||||
|         case DepthFormat::D16: |         D24S8 = 3, | ||||||
|             return 2; |  | ||||||
|         case DepthFormat::D24: |  | ||||||
|             return 3; |  | ||||||
|         case DepthFormat::D24S8: |  | ||||||
|             return 4; |  | ||||||
|         default: |  | ||||||
|             LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); |  | ||||||
|             UNIMPLEMENTED(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Returns the number of bits per depth component of the specified depth format |  | ||||||
|     static u32 DepthBitsPerPixel(DepthFormat format) { |  | ||||||
|         switch (format) { |  | ||||||
|         case DepthFormat::D16: |  | ||||||
|             return 16; |  | ||||||
|         case DepthFormat::D24: |  | ||||||
|         case DepthFormat::D24S8: |  | ||||||
|             return 24; |  | ||||||
|         default: |  | ||||||
|             LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); |  | ||||||
|             UNIMPLEMENTED(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Components are laid out in reverse byte order, most significant bits first. |  | ||||||
|     enum ColorFormat : u32 { |  | ||||||
|         RGBA8    = 0, |  | ||||||
|         RGB8     = 1, |  | ||||||
|         RGB5A1   = 2, |  | ||||||
|         RGB565   = 3, |  | ||||||
|         RGBA4    = 4, |  | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // Returns the number of bytes in the specified color format |     // Returns the number of bytes in the specified color format | ||||||
| @@ -554,6 +526,35 @@ struct Regs { | |||||||
|         } |         } | ||||||
|     } framebuffer; |     } framebuffer; | ||||||
|  |  | ||||||
|  |     // Returns the number of bytes in the specified depth format | ||||||
|  |     static u32 BytesPerDepthPixel(DepthFormat format) { | ||||||
|  |         switch (format) { | ||||||
|  |         case DepthFormat::D16: | ||||||
|  |             return 2; | ||||||
|  |         case DepthFormat::D24: | ||||||
|  |             return 3; | ||||||
|  |         case DepthFormat::D24S8: | ||||||
|  |             return 4; | ||||||
|  |         default: | ||||||
|  |             LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); | ||||||
|  |             UNIMPLEMENTED(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Returns the number of bits per depth component of the specified depth format | ||||||
|  |     static u32 DepthBitsPerPixel(DepthFormat format) { | ||||||
|  |         switch (format) { | ||||||
|  |         case DepthFormat::D16: | ||||||
|  |             return 16; | ||||||
|  |         case DepthFormat::D24: | ||||||
|  |         case DepthFormat::D24S8: | ||||||
|  |             return 24; | ||||||
|  |         default: | ||||||
|  |             LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); | ||||||
|  |             UNIMPLEMENTED(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     INSERT_PADDING_WORDS(0xe0); |     INSERT_PADDING_WORDS(0xe0); | ||||||
|  |  | ||||||
|     enum class VertexAttributeFormat : u64 { |     enum class VertexAttributeFormat : u64 { | ||||||
| @@ -953,9 +954,6 @@ ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); | |||||||
| static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | ||||||
| static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | ||||||
|  |  | ||||||
| extern Regs registers; // TODO: Not sure if we want to have one global instance for this |  | ||||||
|  |  | ||||||
|  |  | ||||||
| struct float24 { | struct float24 { | ||||||
|     static float24 FromFloat32(float val) { |     static float24 FromFloat32(float val) { | ||||||
|         float24 ret; |         float24 ret; | ||||||
| @@ -1066,4 +1064,30 @@ union CommandHeader { | |||||||
|     BitField<31,  1, u32> group_commands; |     BitField<31,  1, u32> group_commands; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | /// Struct used to describe current Pica state | ||||||
|  | struct State { | ||||||
|  |     Regs regs; | ||||||
|  |  | ||||||
|  |     struct { | ||||||
|  |         struct { | ||||||
|  |             Math::Vec4<float24> f[96]; | ||||||
|  |             std::array<bool, 16> b; | ||||||
|  |             std::array<Math::Vec4<u8>, 4> i; | ||||||
|  |         } uniforms; | ||||||
|  |  | ||||||
|  |         Math::Vec4<float24> default_attributes[16]; | ||||||
|  |  | ||||||
|  |         std::array<u32, 1024> program_code; | ||||||
|  |         std::array<u32, 1024> swizzle_data; | ||||||
|  |     } vs; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | /// Initialize Pica state | ||||||
|  | void Init(); | ||||||
|  |  | ||||||
|  | /// Shutdown Pica state | ||||||
|  | void Shutdown(); | ||||||
|  |  | ||||||
|  | extern State g_state; ///< Current Pica state | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|   | |||||||
| @@ -24,72 +24,74 @@ namespace Pica { | |||||||
| namespace Rasterizer { | namespace Rasterizer { | ||||||
|  |  | ||||||
| static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | ||||||
|     const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); |     const auto& framebuffer = g_state.regs.framebuffer; | ||||||
|  |     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|  |  | ||||||
|     // Similarly to textures, the render framebuffer is laid out from bottom to top, too. |     // Similarly to textures, the render framebuffer is laid out from bottom to top, too. | ||||||
|     // NOTE: The framebuffer height register contains the actual FB height minus one. |     // NOTE: The framebuffer height register contains the actual FB height minus one. | ||||||
|     y = (registers.framebuffer.height - y); |     y = framebuffer.height - y; | ||||||
|  |  | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); |     u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; |     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | ||||||
|     u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; |     u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; | ||||||
|  |  | ||||||
|     switch (registers.framebuffer.color_format) { |     switch (framebuffer.color_format) { | ||||||
|     case Pica::Regs::ColorFormat::RGBA8: |     case Regs::ColorFormat::RGBA8: | ||||||
|         Color::EncodeRGBA8(color, dst_pixel); |         Color::EncodeRGBA8(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGB8: |     case Regs::ColorFormat::RGB8: | ||||||
|         Color::EncodeRGB8(color, dst_pixel); |         Color::EncodeRGB8(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGB5A1: |     case Regs::ColorFormat::RGB5A1: | ||||||
|         Color::EncodeRGB5A1(color, dst_pixel); |         Color::EncodeRGB5A1(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGB565: |     case Regs::ColorFormat::RGB565: | ||||||
|         Color::EncodeRGB565(color, dst_pixel); |         Color::EncodeRGB565(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGBA4: |     case Regs::ColorFormat::RGBA4: | ||||||
|         Color::EncodeRGBA4(color, dst_pixel); |         Color::EncodeRGBA4(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); |         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| static const Math::Vec4<u8> GetPixel(int x, int y) { | static const Math::Vec4<u8> GetPixel(int x, int y) { | ||||||
|     const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); |     const auto& framebuffer = g_state.regs.framebuffer; | ||||||
|  |     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|  |  | ||||||
|     y = (registers.framebuffer.height - y); |     y = framebuffer.height - y; | ||||||
|  |  | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); |     u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; |     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | ||||||
|     u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; |     u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; | ||||||
|  |  | ||||||
|     switch (registers.framebuffer.color_format) { |     switch (framebuffer.color_format) { | ||||||
|     case Pica::Regs::ColorFormat::RGBA8: |     case Regs::ColorFormat::RGBA8: | ||||||
|         return Color::DecodeRGBA8(src_pixel); |         return Color::DecodeRGBA8(src_pixel); | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGB8: |     case Regs::ColorFormat::RGB8: | ||||||
|         return Color::DecodeRGB8(src_pixel); |         return Color::DecodeRGB8(src_pixel); | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGB5A1: |     case Regs::ColorFormat::RGB5A1: | ||||||
|         return Color::DecodeRGB5A1(src_pixel); |         return Color::DecodeRGB5A1(src_pixel); | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGB565: |     case Regs::ColorFormat::RGB565: | ||||||
|         return Color::DecodeRGB565(src_pixel); |         return Color::DecodeRGB565(src_pixel); | ||||||
|  |  | ||||||
|     case Pica::Regs::ColorFormat::RGBA4: |     case Regs::ColorFormat::RGBA4: | ||||||
|         return Color::DecodeRGBA4(src_pixel); |         return Color::DecodeRGBA4(src_pixel); | ||||||
|  |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); |         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -97,58 +99,60 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||||||
| } | } | ||||||
|  |  | ||||||
| static u32 GetDepth(int x, int y) { | static u32 GetDepth(int x, int y) { | ||||||
|     const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |     const auto& framebuffer = g_state.regs.framebuffer; | ||||||
|  |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); |     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||||
|  |  | ||||||
|     y = (registers.framebuffer.height - y); |     y = framebuffer.height - y; | ||||||
|      |      | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); |     u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|     u32 stride = registers.framebuffer.width * bytes_per_pixel; |     u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
|  |  | ||||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     u8* src_pixel = depth_buffer + src_offset; |     u8* src_pixel = depth_buffer + src_offset; | ||||||
|  |  | ||||||
|     switch (registers.framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|         case Pica::Regs::DepthFormat::D16: |         case Regs::DepthFormat::D16: | ||||||
|             return Color::DecodeD16(src_pixel); |             return Color::DecodeD16(src_pixel); | ||||||
|         case Pica::Regs::DepthFormat::D24: |         case Regs::DepthFormat::D24: | ||||||
|             return Color::DecodeD24(src_pixel); |             return Color::DecodeD24(src_pixel); | ||||||
|         case Pica::Regs::DepthFormat::D24S8: |         case Regs::DepthFormat::D24S8: | ||||||
|             return Color::DecodeD24S8(src_pixel).x; |             return Color::DecodeD24S8(src_pixel).x; | ||||||
|         default: |         default: | ||||||
|             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); |             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||||
|             UNIMPLEMENTED(); |             UNIMPLEMENTED(); | ||||||
|             return 0; |             return 0; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| static void SetDepth(int x, int y, u32 value) { | static void SetDepth(int x, int y, u32 value) { | ||||||
|     const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |     const auto& framebuffer = g_state.regs.framebuffer; | ||||||
|  |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); |     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||||
|  |  | ||||||
|     y = (registers.framebuffer.height - y); |     y = framebuffer.height - y; | ||||||
|  |  | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); |     u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|     u32 stride = registers.framebuffer.width * bytes_per_pixel; |     u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
|  |  | ||||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     u8* dst_pixel = depth_buffer + dst_offset; |     u8* dst_pixel = depth_buffer + dst_offset; | ||||||
|  |  | ||||||
|     switch (registers.framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|         case Pica::Regs::DepthFormat::D16: |         case Regs::DepthFormat::D16: | ||||||
|             Color::EncodeD16(value, dst_pixel); |             Color::EncodeD16(value, dst_pixel); | ||||||
|             break; |             break; | ||||||
|         case Pica::Regs::DepthFormat::D24: |         case Regs::DepthFormat::D24: | ||||||
|             Color::EncodeD24(value, dst_pixel); |             Color::EncodeD24(value, dst_pixel); | ||||||
|             break; |             break; | ||||||
|         case Pica::Regs::DepthFormat::D24S8: |         case Regs::DepthFormat::D24S8: | ||||||
|             // TODO(Subv): Implement the stencil buffer |             // TODO(Subv): Implement the stencil buffer | ||||||
|             Color::EncodeD24S8(value, 0, dst_pixel); |             Color::EncodeD24S8(value, 0, dst_pixel); | ||||||
|             break; |             break; | ||||||
|         default: |         default: | ||||||
|             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); |             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||||
|             UNIMPLEMENTED(); |             UNIMPLEMENTED(); | ||||||
|             break; |             break; | ||||||
|     } |     } | ||||||
| @@ -200,6 +204,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                                     const VertexShader::OutputVertex& v2, |                                     const VertexShader::OutputVertex& v2, | ||||||
|                                     bool reversed = false) |                                     bool reversed = false) | ||||||
| { | { | ||||||
|  |     const auto& regs = g_state.regs; | ||||||
|     Common::Profiling::ScopeTimer timer(rasterization_category); |     Common::Profiling::ScopeTimer timer(rasterization_category); | ||||||
|  |  | ||||||
|     // vertex positions in rasterizer coordinates |     // vertex positions in rasterizer coordinates | ||||||
| @@ -216,14 +221,14 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                                    ScreenToRasterizerCoordinates(v1.screenpos), |                                    ScreenToRasterizerCoordinates(v1.screenpos), | ||||||
|                                    ScreenToRasterizerCoordinates(v2.screenpos) }; |                                    ScreenToRasterizerCoordinates(v2.screenpos) }; | ||||||
|  |  | ||||||
|     if (registers.cull_mode == Regs::CullMode::KeepAll) { |     if (regs.cull_mode == Regs::CullMode::KeepAll) { | ||||||
|         // Make sure we always end up with a triangle wound counter-clockwise |         // Make sure we always end up with a triangle wound counter-clockwise | ||||||
|         if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { |         if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { | ||||||
|             ProcessTriangleInternal(v0, v2, v1, true); |             ProcessTriangleInternal(v0, v2, v1, true); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) { |         if (!reversed && regs.cull_mode == Regs::CullMode::KeepClockWise) { | ||||||
|             // Reverse vertex order and use the CCW code path. |             // Reverse vertex order and use the CCW code path. | ||||||
|             ProcessTriangleInternal(v0, v2, v1, true); |             ProcessTriangleInternal(v0, v2, v1, true); | ||||||
|             return; |             return; | ||||||
| @@ -268,8 +273,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|  |  | ||||||
|     auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); |     auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | ||||||
|  |  | ||||||
|     auto textures = registers.GetTextures(); |     auto textures = regs.GetTextures(); | ||||||
|     auto tev_stages = registers.GetTevStages(); |     auto tev_stages = regs.GetTevStages(); | ||||||
|  |  | ||||||
|     // Enter rasterization loop, starting at the center of the topleft bounding box corner. |     // Enter rasterization loop, starting at the center of the topleft bounding box corner. | ||||||
|     // TODO: Not sure if looping through x first might be faster |     // TODO: Not sure if looping through x first might be faster | ||||||
| @@ -384,8 +389,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|             // analogously. |             // analogously. | ||||||
|             Math::Vec4<u8> combiner_output; |             Math::Vec4<u8> combiner_output; | ||||||
|             Math::Vec4<u8> combiner_buffer = { |             Math::Vec4<u8> combiner_buffer = { | ||||||
|                 registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, |                 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, | ||||||
|                 registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a |                 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
|             for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { |             for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | ||||||
| @@ -609,51 +614,52 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); |                 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | ||||||
|                 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); |                 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | ||||||
|  |  | ||||||
|                 if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { |                 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { | ||||||
|                     combiner_buffer.r() = combiner_output.r(); |                     combiner_buffer.r() = combiner_output.r(); | ||||||
|                     combiner_buffer.g() = combiner_output.g(); |                     combiner_buffer.g() = combiner_output.g(); | ||||||
|                     combiner_buffer.b() = combiner_output.b(); |                     combiner_buffer.b() = combiner_output.b(); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { |                 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { | ||||||
|                     combiner_buffer.a() = combiner_output.a(); |                     combiner_buffer.a() = combiner_output.a(); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             if (registers.output_merger.alpha_test.enable) { |             const auto& output_merger = regs.output_merger; | ||||||
|  |             if (output_merger.alpha_test.enable) { | ||||||
|                 bool pass = false; |                 bool pass = false; | ||||||
|  |  | ||||||
|                 switch (registers.output_merger.alpha_test.func) { |                 switch (output_merger.alpha_test.func) { | ||||||
|                 case registers.output_merger.Never: |                 case Regs::CompareFunc::Never: | ||||||
|                     pass = false; |                     pass = false; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.Always: |                 case Regs::CompareFunc::Always: | ||||||
|                     pass = true; |                     pass = true; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.Equal: |                 case Regs::CompareFunc::Equal: | ||||||
|                     pass = combiner_output.a() == registers.output_merger.alpha_test.ref; |                     pass = combiner_output.a() == output_merger.alpha_test.ref; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.NotEqual: |                 case Regs::CompareFunc::NotEqual: | ||||||
|                     pass = combiner_output.a() != registers.output_merger.alpha_test.ref; |                     pass = combiner_output.a() != output_merger.alpha_test.ref; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.LessThan: |                 case Regs::CompareFunc::LessThan: | ||||||
|                     pass = combiner_output.a() < registers.output_merger.alpha_test.ref; |                     pass = combiner_output.a() < output_merger.alpha_test.ref; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.LessThanOrEqual: |                 case Regs::CompareFunc::LessThanOrEqual: | ||||||
|                     pass = combiner_output.a() <= registers.output_merger.alpha_test.ref; |                     pass = combiner_output.a() <= output_merger.alpha_test.ref; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.GreaterThan: |                 case Regs::CompareFunc::GreaterThan: | ||||||
|                     pass = combiner_output.a() > registers.output_merger.alpha_test.ref; |                     pass = combiner_output.a() > output_merger.alpha_test.ref; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.GreaterThanOrEqual: |                 case Regs::CompareFunc::GreaterThanOrEqual: | ||||||
|                     pass = combiner_output.a() >= registers.output_merger.alpha_test.ref; |                     pass = combiner_output.a() >= output_merger.alpha_test.ref; | ||||||
|                     break; |                     break; | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -662,8 +668,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|             } |             } | ||||||
|  |  | ||||||
|             // TODO: Does depth indeed only get written even if depth testing is enabled? |             // TODO: Does depth indeed only get written even if depth testing is enabled? | ||||||
|             if (registers.output_merger.depth_test_enable) { |             if (output_merger.depth_test_enable) { | ||||||
|                 unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); |                 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); | ||||||
|                 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + |                 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + | ||||||
|                                v1.screenpos[2].ToFloat32() * w1 + |                                v1.screenpos[2].ToFloat32() * w1 + | ||||||
|                                v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); |                                v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); | ||||||
| @@ -671,36 +677,36 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|  |  | ||||||
|                 bool pass = false; |                 bool pass = false; | ||||||
|  |  | ||||||
|                 switch (registers.output_merger.depth_test_func) { |                 switch (output_merger.depth_test_func) { | ||||||
|                 case registers.output_merger.Never: |                 case Regs::CompareFunc::Never: | ||||||
|                     pass = false; |                     pass = false; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.Always: |                 case Regs::CompareFunc::Always: | ||||||
|                     pass = true; |                     pass = true; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.Equal: |                 case Regs::CompareFunc::Equal: | ||||||
|                     pass = z == ref_z; |                     pass = z == ref_z; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.NotEqual: |                 case Regs::CompareFunc::NotEqual: | ||||||
|                     pass = z != ref_z; |                     pass = z != ref_z; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.LessThan: |                 case Regs::CompareFunc::LessThan: | ||||||
|                     pass = z < ref_z; |                     pass = z < ref_z; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.LessThanOrEqual: |                 case Regs::CompareFunc::LessThanOrEqual: | ||||||
|                     pass = z <= ref_z; |                     pass = z <= ref_z; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.GreaterThan: |                 case Regs::CompareFunc::GreaterThan: | ||||||
|                     pass = z > ref_z; |                     pass = z > ref_z; | ||||||
|                     break; |                     break; | ||||||
|  |  | ||||||
|                 case registers.output_merger.GreaterThanOrEqual: |                 case Regs::CompareFunc::GreaterThanOrEqual: | ||||||
|                     pass = z >= ref_z; |                     pass = z >= ref_z; | ||||||
|                     break; |                     break; | ||||||
|                 } |                 } | ||||||
| @@ -708,59 +714,59 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                 if (!pass) |                 if (!pass) | ||||||
|                     continue; |                     continue; | ||||||
|  |  | ||||||
|                 if (registers.output_merger.depth_write_enable) |                 if (output_merger.depth_write_enable) | ||||||
|                     SetDepth(x >> 4, y >> 4, z); |                     SetDepth(x >> 4, y >> 4, z); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             auto dest = GetPixel(x >> 4, y >> 4); |             auto dest = GetPixel(x >> 4, y >> 4); | ||||||
|             Math::Vec4<u8> blend_output = combiner_output; |             Math::Vec4<u8> blend_output = combiner_output; | ||||||
|  |  | ||||||
|             if (registers.output_merger.alphablend_enable) { |             if (output_merger.alphablend_enable) { | ||||||
|                 auto params = registers.output_merger.alpha_blending; |                 auto params = output_merger.alpha_blending; | ||||||
|  |  | ||||||
|                 auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> { |                 auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> { | ||||||
|                     switch (factor) { |                     switch (factor) { | ||||||
|                     case params.Zero: |                     case Regs::BlendFactor::Zero : | ||||||
|                         return Math::Vec3<u8>(0, 0, 0); |                         return Math::Vec3<u8>(0, 0, 0); | ||||||
|  |  | ||||||
|                     case params.One: |                     case Regs::BlendFactor::One : | ||||||
|                         return Math::Vec3<u8>(255, 255, 255); |                         return Math::Vec3<u8>(255, 255, 255); | ||||||
|  |  | ||||||
|                     case params.SourceColor: |                     case Regs::BlendFactor::SourceColor: | ||||||
|                         return combiner_output.rgb(); |                         return combiner_output.rgb(); | ||||||
|  |  | ||||||
|                     case params.OneMinusSourceColor: |                     case Regs::BlendFactor::OneMinusSourceColor: | ||||||
|                         return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); |                         return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); | ||||||
|  |  | ||||||
|                     case params.DestColor: |                     case Regs::BlendFactor::DestColor: | ||||||
|                         return dest.rgb(); |                         return dest.rgb(); | ||||||
|  |  | ||||||
|                     case params.OneMinusDestColor: |                     case Regs::BlendFactor::OneMinusDestColor: | ||||||
|                         return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); |                         return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); | ||||||
|  |  | ||||||
|                     case params.SourceAlpha: |                     case Regs::BlendFactor::SourceAlpha: | ||||||
|                         return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); |                         return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); | ||||||
|  |  | ||||||
|                     case params.OneMinusSourceAlpha: |                     case Regs::BlendFactor::OneMinusSourceAlpha: | ||||||
|                         return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); |                         return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); | ||||||
|  |  | ||||||
|                     case params.DestAlpha: |                     case Regs::BlendFactor::DestAlpha: | ||||||
|                         return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); |                         return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); | ||||||
|  |  | ||||||
|                     case params.OneMinusDestAlpha: |                     case Regs::BlendFactor::OneMinusDestAlpha: | ||||||
|                         return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); |                         return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); | ||||||
|  |  | ||||||
|                     case params.ConstantColor: |                     case Regs::BlendFactor::ConstantColor: | ||||||
|                         return Math::Vec3<u8>(registers.output_merger.blend_const.r, registers.output_merger.blend_const.g, registers.output_merger.blend_const.b); |                         return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); | ||||||
|  |  | ||||||
|                     case params.OneMinusConstantColor: |                     case Regs::BlendFactor::OneMinusConstantColor: | ||||||
|                         return Math::Vec3<u8>(255 - registers.output_merger.blend_const.r, 255 - registers.output_merger.blend_const.g, 255 - registers.output_merger.blend_const.b); |                         return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); | ||||||
|  |  | ||||||
|                     case params.ConstantAlpha: |                     case Regs::BlendFactor::ConstantAlpha: | ||||||
|                         return Math::Vec3<u8>(registers.output_merger.blend_const.a, registers.output_merger.blend_const.a, registers.output_merger.blend_const.a); |                         return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); | ||||||
|  |  | ||||||
|                     case params.OneMinusConstantAlpha: |                     case Regs::BlendFactor::OneMinusConstantAlpha: | ||||||
|                         return Math::Vec3<u8>(255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a); |                         return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); | ||||||
|  |  | ||||||
|                     default: |                     default: | ||||||
|                         LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); |                         LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | ||||||
| @@ -769,31 +775,31 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                     } |                     } | ||||||
|                 }; |                 }; | ||||||
|  |  | ||||||
|                 auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { |                 auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 { | ||||||
|                     switch (factor) { |                     switch (factor) { | ||||||
|                     case params.Zero: |                     case Regs::BlendFactor::Zero: | ||||||
|                         return 0; |                         return 0; | ||||||
|  |  | ||||||
|                     case params.One: |                     case Regs::BlendFactor::One: | ||||||
|                         return 255; |                         return 255; | ||||||
|  |  | ||||||
|                     case params.SourceAlpha: |                     case Regs::BlendFactor::SourceAlpha: | ||||||
|                         return combiner_output.a(); |                         return combiner_output.a(); | ||||||
|  |  | ||||||
|                     case params.OneMinusSourceAlpha: |                     case Regs::BlendFactor::OneMinusSourceAlpha: | ||||||
|                         return 255 - combiner_output.a(); |                         return 255 - combiner_output.a(); | ||||||
|  |  | ||||||
|                     case params.DestAlpha: |                     case Regs::BlendFactor::DestAlpha: | ||||||
|                         return dest.a(); |                         return dest.a(); | ||||||
|  |  | ||||||
|                     case params.OneMinusDestAlpha: |                     case Regs::BlendFactor::OneMinusDestAlpha: | ||||||
|                         return 255 - dest.a(); |                         return 255 - dest.a(); | ||||||
|  |  | ||||||
|                     case params.ConstantAlpha: |                     case Regs::BlendFactor::ConstantAlpha: | ||||||
|                         return registers.output_merger.blend_const.a; |                         return output_merger.blend_const.a; | ||||||
|  |  | ||||||
|                     case params.OneMinusConstantAlpha: |                     case Regs::BlendFactor::OneMinusConstantAlpha: | ||||||
|                         return 255 - registers.output_merger.blend_const.a; |                         return 255 - output_merger.blend_const.a; | ||||||
|  |  | ||||||
|                     default: |                     default: | ||||||
|                         LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); |                         LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | ||||||
| @@ -802,7 +808,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                     } |                     } | ||||||
|                 }; |                 }; | ||||||
|  |  | ||||||
|                 using BlendEquation = decltype(params)::BlendEquation; |                 using BlendEquation = Regs::BlendEquation; | ||||||
|                 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, |                 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||||||
|                                                        const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, |                                                        const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||||||
|                                                        BlendEquation equation) { |                                                        BlendEquation equation) { | ||||||
| @@ -812,29 +818,29 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                     auto dst_result = (dest * destfactor).Cast<int>(); |                     auto dst_result = (dest * destfactor).Cast<int>(); | ||||||
|  |  | ||||||
|                     switch (equation) { |                     switch (equation) { | ||||||
|                     case BlendEquation::Add: |                     case Regs::BlendEquation::Add: | ||||||
|                         result = (src_result + dst_result) / 255; |                         result = (src_result + dst_result) / 255; | ||||||
|                         break; |                         break; | ||||||
|  |  | ||||||
|                     case BlendEquation::Subtract: |                     case Regs::BlendEquation::Subtract: | ||||||
|                         result = (src_result - dst_result) / 255; |                         result = (src_result - dst_result) / 255; | ||||||
|                         break; |                         break; | ||||||
|  |  | ||||||
|                     case BlendEquation::ReverseSubtract: |                     case Regs::BlendEquation::ReverseSubtract: | ||||||
|                         result = (dst_result - src_result) / 255; |                         result = (dst_result - src_result) / 255; | ||||||
|                         break; |                         break; | ||||||
|  |  | ||||||
|                     // TODO: How do these two actually work? |                     // TODO: How do these two actually work? | ||||||
|                     //       OpenGL doesn't include the blend factors in the min/max computations, |                     //       OpenGL doesn't include the blend factors in the min/max computations, | ||||||
|                     //       but is this what the 3DS actually does? |                     //       but is this what the 3DS actually does? | ||||||
|                     case BlendEquation::Min: |                     case Regs::BlendEquation::Min: | ||||||
|                         result.r() = std::min(src.r(), dest.r()); |                         result.r() = std::min(src.r(), dest.r()); | ||||||
|                         result.g() = std::min(src.g(), dest.g()); |                         result.g() = std::min(src.g(), dest.g()); | ||||||
|                         result.b() = std::min(src.b(), dest.b()); |                         result.b() = std::min(src.b(), dest.b()); | ||||||
|                         result.a() = std::min(src.a(), dest.a()); |                         result.a() = std::min(src.a(), dest.a()); | ||||||
|                         break; |                         break; | ||||||
|  |  | ||||||
|                     case BlendEquation::Max: |                     case Regs::BlendEquation::Max: | ||||||
|                         result.r() = std::max(src.r(), dest.r()); |                         result.r() = std::max(src.r(), dest.r()); | ||||||
|                         result.g() = std::max(src.g(), dest.g()); |                         result.g() = std::max(src.g(), dest.g()); | ||||||
|                         result.b() = std::max(src.b(), dest.b()); |                         result.b() = std::max(src.b(), dest.b()); | ||||||
| @@ -860,15 +866,15 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||||||
|                 blend_output     = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); |                 blend_output     = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | ||||||
|                 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); |                 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | ||||||
|             } else { |             } else { | ||||||
|                 LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); |                 LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op); | ||||||
|                 UNIMPLEMENTED(); |                 UNIMPLEMENTED(); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             const Math::Vec4<u8> result = { |             const Math::Vec4<u8> result = { | ||||||
|                 registers.output_merger.red_enable   ? blend_output.r() : dest.r(), |                 output_merger.red_enable   ? blend_output.r() : dest.r(), | ||||||
|                 registers.output_merger.green_enable ? blend_output.g() : dest.g(), |                 output_merger.green_enable ? blend_output.g() : dest.g(), | ||||||
|                 registers.output_merger.blue_enable  ? blend_output.b() : dest.b(), |                 output_merger.blue_enable  ? blend_output.b() : dest.b(), | ||||||
|                 registers.output_merger.alpha_enable ? blend_output.a() : dest.a() |                 output_merger.alpha_enable ? blend_output.a() : dest.a() | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
|             DrawPixel(x >> 4, y >> 4, result); |             DrawPixel(x >> 4, y >> 4, result); | ||||||
|   | |||||||
| @@ -46,7 +46,7 @@ void RasterizerOpenGL::InitObjects() { | |||||||
|  |  | ||||||
|     uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color"); |     uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color"); | ||||||
|  |  | ||||||
|     const auto tev_stages = Pica::registers.GetTevStages(); |     const auto tev_stages = Pica::g_state.regs.GetTevStages(); | ||||||
|     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { |     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | ||||||
|         auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index]; |         auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index]; | ||||||
|  |  | ||||||
| @@ -128,6 +128,8 @@ void RasterizerOpenGL::InitObjects() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::Reset() { | void RasterizerOpenGL::Reset() { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |  | ||||||
|     SyncCullMode(); |     SyncCullMode(); | ||||||
|     SyncBlendEnabled(); |     SyncBlendEnabled(); | ||||||
|     SyncBlendFuncs(); |     SyncBlendFuncs(); | ||||||
| @@ -137,46 +139,46 @@ void RasterizerOpenGL::Reset() { | |||||||
|     SyncDepthTest(); |     SyncDepthTest(); | ||||||
|  |  | ||||||
|     // TEV stage 0 |     // TEV stage 0 | ||||||
|     SyncTevSources(0, Pica::registers.tev_stage0); |     SyncTevSources(0, regs.tev_stage0); | ||||||
|     SyncTevModifiers(0, Pica::registers.tev_stage0); |     SyncTevModifiers(0, regs.tev_stage0); | ||||||
|     SyncTevOps(0, Pica::registers.tev_stage0); |     SyncTevOps(0, regs.tev_stage0); | ||||||
|     SyncTevColor(0, Pica::registers.tev_stage0); |     SyncTevColor(0, regs.tev_stage0); | ||||||
|     SyncTevMultipliers(0, Pica::registers.tev_stage0); |     SyncTevMultipliers(0, regs.tev_stage0); | ||||||
|  |  | ||||||
|     // TEV stage 1 |     // TEV stage 1 | ||||||
|     SyncTevSources(1, Pica::registers.tev_stage1); |     SyncTevSources(1, regs.tev_stage1); | ||||||
|     SyncTevModifiers(1, Pica::registers.tev_stage1); |     SyncTevModifiers(1, regs.tev_stage1); | ||||||
|     SyncTevOps(1, Pica::registers.tev_stage1); |     SyncTevOps(1, regs.tev_stage1); | ||||||
|     SyncTevColor(1, Pica::registers.tev_stage1); |     SyncTevColor(1, regs.tev_stage1); | ||||||
|     SyncTevMultipliers(1, Pica::registers.tev_stage1); |     SyncTevMultipliers(1, regs.tev_stage1); | ||||||
|  |  | ||||||
|     // TEV stage 2 |     // TEV stage 2 | ||||||
|     SyncTevSources(2, Pica::registers.tev_stage2); |     SyncTevSources(2, regs.tev_stage2); | ||||||
|     SyncTevModifiers(2, Pica::registers.tev_stage2); |     SyncTevModifiers(2, regs.tev_stage2); | ||||||
|     SyncTevOps(2, Pica::registers.tev_stage2); |     SyncTevOps(2, regs.tev_stage2); | ||||||
|     SyncTevColor(2, Pica::registers.tev_stage2); |     SyncTevColor(2, regs.tev_stage2); | ||||||
|     SyncTevMultipliers(2, Pica::registers.tev_stage2); |     SyncTevMultipliers(2, regs.tev_stage2); | ||||||
|  |  | ||||||
|     // TEV stage 3 |     // TEV stage 3 | ||||||
|     SyncTevSources(3, Pica::registers.tev_stage3); |     SyncTevSources(3, regs.tev_stage3); | ||||||
|     SyncTevModifiers(3, Pica::registers.tev_stage3); |     SyncTevModifiers(3, regs.tev_stage3); | ||||||
|     SyncTevOps(3, Pica::registers.tev_stage3); |     SyncTevOps(3, regs.tev_stage3); | ||||||
|     SyncTevColor(3, Pica::registers.tev_stage3); |     SyncTevColor(3, regs.tev_stage3); | ||||||
|     SyncTevMultipliers(3, Pica::registers.tev_stage3); |     SyncTevMultipliers(3, regs.tev_stage3); | ||||||
|  |  | ||||||
|     // TEV stage 4 |     // TEV stage 4 | ||||||
|     SyncTevSources(4, Pica::registers.tev_stage4); |     SyncTevSources(4, regs.tev_stage4); | ||||||
|     SyncTevModifiers(4, Pica::registers.tev_stage4); |     SyncTevModifiers(4, regs.tev_stage4); | ||||||
|     SyncTevOps(4, Pica::registers.tev_stage4); |     SyncTevOps(4, regs.tev_stage4); | ||||||
|     SyncTevColor(4, Pica::registers.tev_stage4); |     SyncTevColor(4, regs.tev_stage4); | ||||||
|     SyncTevMultipliers(4, Pica::registers.tev_stage4); |     SyncTevMultipliers(4, regs.tev_stage4); | ||||||
|  |  | ||||||
|     // TEV stage 5 |     // TEV stage 5 | ||||||
|     SyncTevSources(5, Pica::registers.tev_stage5); |     SyncTevSources(5, regs.tev_stage5); | ||||||
|     SyncTevModifiers(5, Pica::registers.tev_stage5); |     SyncTevModifiers(5, regs.tev_stage5); | ||||||
|     SyncTevOps(5, Pica::registers.tev_stage5); |     SyncTevOps(5, regs.tev_stage5); | ||||||
|     SyncTevColor(5, Pica::registers.tev_stage5); |     SyncTevColor(5, regs.tev_stage5); | ||||||
|     SyncTevMultipliers(5, Pica::registers.tev_stage5); |     SyncTevMultipliers(5, regs.tev_stage5); | ||||||
|  |  | ||||||
|     SyncCombinerColor(); |     SyncCombinerColor(); | ||||||
|     SyncCombinerWriteFlags(); |     SyncCombinerWriteFlags(); | ||||||
| @@ -210,6 +212,8 @@ void RasterizerOpenGL::CommitFramebuffer() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |  | ||||||
|     if (!Settings::values.use_hw_renderer) |     if (!Settings::values.use_hw_renderer) | ||||||
|         return; |         return; | ||||||
|  |  | ||||||
| @@ -247,104 +251,104 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||||||
|  |  | ||||||
|     // TEV stage 0 |     // TEV stage 0 | ||||||
|     case PICA_REG_INDEX(tev_stage0.color_source1): |     case PICA_REG_INDEX(tev_stage0.color_source1): | ||||||
|         SyncTevSources(0, Pica::registers.tev_stage0); |         SyncTevSources(0, regs.tev_stage0); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage0.color_modifier1): |     case PICA_REG_INDEX(tev_stage0.color_modifier1): | ||||||
|         SyncTevModifiers(0, Pica::registers.tev_stage0); |         SyncTevModifiers(0, regs.tev_stage0); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage0.color_op): |     case PICA_REG_INDEX(tev_stage0.color_op): | ||||||
|         SyncTevOps(0, Pica::registers.tev_stage0); |         SyncTevOps(0, regs.tev_stage0); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage0.const_r): |     case PICA_REG_INDEX(tev_stage0.const_r): | ||||||
|         SyncTevColor(0, Pica::registers.tev_stage0); |         SyncTevColor(0, regs.tev_stage0); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage0.color_scale): |     case PICA_REG_INDEX(tev_stage0.color_scale): | ||||||
|         SyncTevMultipliers(0, Pica::registers.tev_stage0); |         SyncTevMultipliers(0, regs.tev_stage0); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     // TEV stage 1 |     // TEV stage 1 | ||||||
|     case PICA_REG_INDEX(tev_stage1.color_source1): |     case PICA_REG_INDEX(tev_stage1.color_source1): | ||||||
|         SyncTevSources(1, Pica::registers.tev_stage1); |         SyncTevSources(1, regs.tev_stage1); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage1.color_modifier1): |     case PICA_REG_INDEX(tev_stage1.color_modifier1): | ||||||
|         SyncTevModifiers(1, Pica::registers.tev_stage1); |         SyncTevModifiers(1, regs.tev_stage1); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage1.color_op): |     case PICA_REG_INDEX(tev_stage1.color_op): | ||||||
|         SyncTevOps(1, Pica::registers.tev_stage1); |         SyncTevOps(1, regs.tev_stage1); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage1.const_r): |     case PICA_REG_INDEX(tev_stage1.const_r): | ||||||
|         SyncTevColor(1, Pica::registers.tev_stage1); |         SyncTevColor(1, regs.tev_stage1); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage1.color_scale): |     case PICA_REG_INDEX(tev_stage1.color_scale): | ||||||
|         SyncTevMultipliers(1, Pica::registers.tev_stage1); |         SyncTevMultipliers(1, regs.tev_stage1); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     // TEV stage 2 |     // TEV stage 2 | ||||||
|     case PICA_REG_INDEX(tev_stage2.color_source1): |     case PICA_REG_INDEX(tev_stage2.color_source1): | ||||||
|         SyncTevSources(2, Pica::registers.tev_stage2); |         SyncTevSources(2, regs.tev_stage2); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage2.color_modifier1): |     case PICA_REG_INDEX(tev_stage2.color_modifier1): | ||||||
|         SyncTevModifiers(2, Pica::registers.tev_stage2); |         SyncTevModifiers(2, regs.tev_stage2); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage2.color_op): |     case PICA_REG_INDEX(tev_stage2.color_op): | ||||||
|         SyncTevOps(2, Pica::registers.tev_stage2); |         SyncTevOps(2, regs.tev_stage2); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage2.const_r): |     case PICA_REG_INDEX(tev_stage2.const_r): | ||||||
|         SyncTevColor(2, Pica::registers.tev_stage2); |         SyncTevColor(2, regs.tev_stage2); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage2.color_scale): |     case PICA_REG_INDEX(tev_stage2.color_scale): | ||||||
|         SyncTevMultipliers(2, Pica::registers.tev_stage2); |         SyncTevMultipliers(2, regs.tev_stage2); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     // TEV stage 3 |     // TEV stage 3 | ||||||
|     case PICA_REG_INDEX(tev_stage3.color_source1): |     case PICA_REG_INDEX(tev_stage3.color_source1): | ||||||
|         SyncTevSources(3, Pica::registers.tev_stage3); |         SyncTevSources(3, regs.tev_stage3); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage3.color_modifier1): |     case PICA_REG_INDEX(tev_stage3.color_modifier1): | ||||||
|         SyncTevModifiers(3, Pica::registers.tev_stage3); |         SyncTevModifiers(3, regs.tev_stage3); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage3.color_op): |     case PICA_REG_INDEX(tev_stage3.color_op): | ||||||
|         SyncTevOps(3, Pica::registers.tev_stage3); |         SyncTevOps(3, regs.tev_stage3); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage3.const_r): |     case PICA_REG_INDEX(tev_stage3.const_r): | ||||||
|         SyncTevColor(3, Pica::registers.tev_stage3); |         SyncTevColor(3, regs.tev_stage3); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage3.color_scale): |     case PICA_REG_INDEX(tev_stage3.color_scale): | ||||||
|         SyncTevMultipliers(3, Pica::registers.tev_stage3); |         SyncTevMultipliers(3, regs.tev_stage3); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     // TEV stage 4 |     // TEV stage 4 | ||||||
|     case PICA_REG_INDEX(tev_stage4.color_source1): |     case PICA_REG_INDEX(tev_stage4.color_source1): | ||||||
|         SyncTevSources(4, Pica::registers.tev_stage4); |         SyncTevSources(4, regs.tev_stage4); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage4.color_modifier1): |     case PICA_REG_INDEX(tev_stage4.color_modifier1): | ||||||
|         SyncTevModifiers(4, Pica::registers.tev_stage4); |         SyncTevModifiers(4, regs.tev_stage4); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage4.color_op): |     case PICA_REG_INDEX(tev_stage4.color_op): | ||||||
|         SyncTevOps(4, Pica::registers.tev_stage4); |         SyncTevOps(4, regs.tev_stage4); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage4.const_r): |     case PICA_REG_INDEX(tev_stage4.const_r): | ||||||
|         SyncTevColor(4, Pica::registers.tev_stage4); |         SyncTevColor(4, regs.tev_stage4); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage4.color_scale): |     case PICA_REG_INDEX(tev_stage4.color_scale): | ||||||
|         SyncTevMultipliers(4, Pica::registers.tev_stage4); |         SyncTevMultipliers(4, regs.tev_stage4); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     // TEV stage 5 |     // TEV stage 5 | ||||||
|     case PICA_REG_INDEX(tev_stage5.color_source1): |     case PICA_REG_INDEX(tev_stage5.color_source1): | ||||||
|         SyncTevSources(5, Pica::registers.tev_stage5); |         SyncTevSources(5, regs.tev_stage5); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage5.color_modifier1): |     case PICA_REG_INDEX(tev_stage5.color_modifier1): | ||||||
|         SyncTevModifiers(5, Pica::registers.tev_stage5); |         SyncTevModifiers(5, regs.tev_stage5); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage5.color_op): |     case PICA_REG_INDEX(tev_stage5.color_op): | ||||||
|         SyncTevOps(5, Pica::registers.tev_stage5); |         SyncTevOps(5, regs.tev_stage5); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage5.const_r): |     case PICA_REG_INDEX(tev_stage5.const_r): | ||||||
|         SyncTevColor(5, Pica::registers.tev_stage5); |         SyncTevColor(5, regs.tev_stage5); | ||||||
|         break; |         break; | ||||||
|     case PICA_REG_INDEX(tev_stage5.color_scale): |     case PICA_REG_INDEX(tev_stage5.color_scale): | ||||||
|         SyncTevMultipliers(5, Pica::registers.tev_stage5); |         SyncTevMultipliers(5, regs.tev_stage5); | ||||||
|         break; |         break; | ||||||
|      |      | ||||||
|     // TEV combiner buffer color |     // TEV combiner buffer color | ||||||
| @@ -360,16 +364,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { | void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |  | ||||||
|     if (!Settings::values.use_hw_renderer) |     if (!Settings::values.use_hw_renderer) | ||||||
|         return; |         return; | ||||||
|  |  | ||||||
|     PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); |     PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|     u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) |     u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) | ||||||
|                             * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); |                             * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||||||
|  |  | ||||||
|     PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); |     PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) |     u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) | ||||||
|                             * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); |                             * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||||||
|  |  | ||||||
|     // If source memory region overlaps 3DS framebuffers, commit them before the copy happens |     // If source memory region overlaps 3DS framebuffers, commit them before the copy happens | ||||||
|     if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) |     if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) | ||||||
| @@ -380,16 +386,18 @@ void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) { | void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |  | ||||||
|     if (!Settings::values.use_hw_renderer) |     if (!Settings::values.use_hw_renderer) | ||||||
|         return; |         return; | ||||||
|  |  | ||||||
|     PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); |     PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|     u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) |     u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) | ||||||
|                             * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); |                             * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||||||
|  |  | ||||||
|     PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); |     PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) |     u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) | ||||||
|                             * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); |                             * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||||||
|  |  | ||||||
|     // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL |     // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL | ||||||
|     if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) |     if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) | ||||||
| @@ -501,14 +509,16 @@ void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica:: | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncFramebuffer() { | void RasterizerOpenGL::SyncFramebuffer() { | ||||||
|     PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); |     const auto& regs = Pica::g_state.regs; | ||||||
|     Pica::Regs::ColorFormat new_fb_color_format = Pica::registers.framebuffer.color_format; |  | ||||||
|  |  | ||||||
|     PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); |     PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|     Pica::Regs::DepthFormat new_fb_depth_format = Pica::registers.framebuffer.depth_format; |     Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; | ||||||
|  |  | ||||||
|     bool fb_size_changed = fb_color_texture.width != Pica::registers.framebuffer.GetWidth() || |     PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|                            fb_color_texture.height != Pica::registers.framebuffer.GetHeight(); |     Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; | ||||||
|  |  | ||||||
|  |     bool fb_size_changed = fb_color_texture.width != regs.framebuffer.GetWidth() || | ||||||
|  |                            fb_color_texture.height != regs.framebuffer.GetHeight(); | ||||||
|  |  | ||||||
|     bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format || |     bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format || | ||||||
|                                  fb_size_changed; |                                  fb_size_changed; | ||||||
| @@ -532,12 +542,12 @@ void RasterizerOpenGL::SyncFramebuffer() { | |||||||
|     // Reconfigure framebuffer textures if any property has changed |     // Reconfigure framebuffer textures if any property has changed | ||||||
|     if (color_fb_prop_changed) { |     if (color_fb_prop_changed) { | ||||||
|         ReconfigureColorTexture(fb_color_texture, new_fb_color_format, |         ReconfigureColorTexture(fb_color_texture, new_fb_color_format, | ||||||
|                                 Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); |                                 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (depth_fb_prop_changed) { |     if (depth_fb_prop_changed) { | ||||||
|         ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format, |         ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format, | ||||||
|                                 Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); |                                 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight()); | ||||||
|  |  | ||||||
|         // Only attach depth buffer as stencil if it supports stencil |         // Only attach depth buffer as stencil if it supports stencil | ||||||
|         switch (new_fb_depth_format) { |         switch (new_fb_depth_format) { | ||||||
| @@ -572,7 +582,9 @@ void RasterizerOpenGL::SyncFramebuffer() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncCullMode() { | void RasterizerOpenGL::SyncCullMode() { | ||||||
|     switch (Pica::registers.cull_mode) { |     const auto& regs = Pica::g_state.regs; | ||||||
|  |  | ||||||
|  |     switch (regs.cull_mode) { | ||||||
|     case Pica::Regs::CullMode::KeepAll: |     case Pica::Regs::CullMode::KeepAll: | ||||||
|         state.cull.enabled = false; |         state.cull.enabled = false; | ||||||
|         break; |         break; | ||||||
| @@ -588,25 +600,26 @@ void RasterizerOpenGL::SyncCullMode() { | |||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", Pica::registers.cull_mode.Value()); |         LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", regs.cull_mode.Value()); | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncBlendEnabled() { | void RasterizerOpenGL::SyncBlendEnabled() { | ||||||
|     state.blend.enabled = Pica::registers.output_merger.alphablend_enable; |     state.blend.enabled = (Pica::g_state.regs.output_merger.alphablend_enable == 1); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncBlendFuncs() { | void RasterizerOpenGL::SyncBlendFuncs() { | ||||||
|     state.blend.src_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_rgb); |     const auto& regs = Pica::g_state.regs; | ||||||
|     state.blend.dst_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_rgb); |     state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); | ||||||
|     state.blend.src_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_a); |     state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); | ||||||
|     state.blend.dst_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_a); |     state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); | ||||||
|  |     state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncBlendColor() { | void RasterizerOpenGL::SyncBlendColor() { | ||||||
|     auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.output_merger.blend_const.r); |     auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::g_state.regs.output_merger.blend_const.r); | ||||||
|     state.blend.color.red = blend_color[0]; |     state.blend.color.red = blend_color[0]; | ||||||
|     state.blend.color.green = blend_color[1]; |     state.blend.color.green = blend_color[1]; | ||||||
|     state.blend.color.blue = blend_color[2]; |     state.blend.color.blue = blend_color[2]; | ||||||
| @@ -614,9 +627,10 @@ void RasterizerOpenGL::SyncBlendColor() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncAlphaTest() { | void RasterizerOpenGL::SyncAlphaTest() { | ||||||
|     glUniform1i(uniform_alphatest_enabled, Pica::registers.output_merger.alpha_test.enable); |     const auto& regs = Pica::g_state.regs; | ||||||
|     glUniform1i(uniform_alphatest_func, Pica::registers.output_merger.alpha_test.func); |     glUniform1i(uniform_alphatest_enabled, regs.output_merger.alpha_test.enable); | ||||||
|     glUniform1f(uniform_alphatest_ref, Pica::registers.output_merger.alpha_test.ref / 255.0f); |     glUniform1i(uniform_alphatest_func, (GLint)regs.output_merger.alpha_test.func.Value()); | ||||||
|  |     glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncStencilTest() { | void RasterizerOpenGL::SyncStencilTest() { | ||||||
| @@ -624,9 +638,10 @@ void RasterizerOpenGL::SyncStencilTest() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncDepthTest() { | void RasterizerOpenGL::SyncDepthTest() { | ||||||
|     state.depth.test_enabled = Pica::registers.output_merger.depth_test_enable; |     const auto& regs = Pica::g_state.regs; | ||||||
|     state.depth.test_func = PicaToGL::CompareFunc(Pica::registers.output_merger.depth_test_func); |     state.depth.test_enabled = (regs.output_merger.depth_test_enable == 1); | ||||||
|     state.depth.write_mask = Pica::registers.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; |     state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func); | ||||||
|  |     state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { | void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { | ||||||
| @@ -667,34 +682,37 @@ void RasterizerOpenGL::SyncTevMultipliers(unsigned stage_index, const Pica::Regs | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncCombinerColor() { | void RasterizerOpenGL::SyncCombinerColor() { | ||||||
|     auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.tev_combiner_buffer_color.r); |     auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::g_state.regs.tev_combiner_buffer_color.r); | ||||||
|     glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data()); |     glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data()); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncCombinerWriteFlags() { | void RasterizerOpenGL::SyncCombinerWriteFlags() { | ||||||
|     const auto tev_stages = Pica::registers.GetTevStages(); |     const auto& regs = Pica::g_state.regs; | ||||||
|  |     const auto tev_stages = regs.GetTevStages(); | ||||||
|     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { |     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | ||||||
|         glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha, |         glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha, | ||||||
|                     Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index), |                     regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index), | ||||||
|                     Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)); |                     regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::SyncDrawState() { | void RasterizerOpenGL::SyncDrawState() { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |  | ||||||
|     // Sync the viewport |     // Sync the viewport | ||||||
|     GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_x).ToFloat32() * 2; |     GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; | ||||||
|     GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_y).ToFloat32() * 2; |     GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; | ||||||
|  |  | ||||||
|     // OpenGL uses different y coordinates, so negate corner offset and flip origin |     // OpenGL uses different y coordinates, so negate corner offset and flip origin | ||||||
|     // TODO: Ensure viewport_corner.x should not be negated or origin flipped |     // TODO: Ensure viewport_corner.x should not be negated or origin flipped | ||||||
|     // TODO: Use floating-point viewports for accuracy if supported |     // TODO: Use floating-point viewports for accuracy if supported | ||||||
|     glViewport((GLsizei)static_cast<float>(Pica::registers.viewport_corner.x), |     glViewport((GLsizei)static_cast<float>(regs.viewport_corner.x), | ||||||
|                 -(GLsizei)static_cast<float>(Pica::registers.viewport_corner.y) |                 -(GLsizei)static_cast<float>(regs.viewport_corner.y) | ||||||
|                     + Pica::registers.framebuffer.GetHeight() - viewport_height, |                     + regs.framebuffer.GetHeight() - viewport_height, | ||||||
|                 viewport_width, viewport_height); |                 viewport_width, viewport_height); | ||||||
|  |  | ||||||
|     // Sync bound texture(s), upload if not cached |     // Sync bound texture(s), upload if not cached | ||||||
|     const auto pica_textures = Pica::registers.GetTextures(); |     const auto pica_textures = regs.GetTextures(); | ||||||
|     for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { |     for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { | ||||||
|         const auto& texture = pica_textures[texture_index]; |         const auto& texture = pica_textures[texture_index]; | ||||||
|  |  | ||||||
| @@ -707,7 +725,7 @@ void RasterizerOpenGL::SyncDrawState() { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Skip processing TEV stages that simply pass the previous stage results through |     // Skip processing TEV stages that simply pass the previous stage results through | ||||||
|     const auto tev_stages = Pica::registers.GetTevStages(); |     const auto tev_stages = regs.GetTevStages(); | ||||||
|     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { |     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | ||||||
|         glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index])); |         glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index])); | ||||||
|     } |     } | ||||||
| @@ -716,7 +734,7 @@ void RasterizerOpenGL::SyncDrawState() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::ReloadColorBuffer() { | void RasterizerOpenGL::ReloadColorBuffer() { | ||||||
|     u8* color_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetColorBufferPhysicalAddress()); |     u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); | ||||||
|  |  | ||||||
|     if (color_buffer == nullptr) |     if (color_buffer == nullptr) | ||||||
|         return; |         return; | ||||||
| @@ -748,7 +766,7 @@ void RasterizerOpenGL::ReloadColorBuffer() { | |||||||
|  |  | ||||||
| void RasterizerOpenGL::ReloadDepthBuffer() { | void RasterizerOpenGL::ReloadDepthBuffer() { | ||||||
|     // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil |     // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil | ||||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetDepthBufferPhysicalAddress()); |     u8* depth_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress()); | ||||||
|  |  | ||||||
|     if (depth_buffer == nullptr) { |     if (depth_buffer == nullptr) { | ||||||
|         return; |         return; | ||||||
|   | |||||||
| @@ -41,7 +41,7 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { | |||||||
|     return gl_mode; |     return gl_mode; | ||||||
| } | } | ||||||
|  |  | ||||||
| inline GLenum BlendFunc(u32 factor) { | inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { | ||||||
|     static const GLenum blend_func_table[] = { |     static const GLenum blend_func_table[] = { | ||||||
|         GL_ZERO,                     // BlendFactor::Zero |         GL_ZERO,                     // BlendFactor::Zero | ||||||
|         GL_ONE,                      // BlendFactor::One |         GL_ONE,                      // BlendFactor::One | ||||||
| @@ -61,17 +61,17 @@ inline GLenum BlendFunc(u32 factor) { | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // Range check table for input |     // Range check table for input | ||||||
|     if (factor >= ARRAY_SIZE(blend_func_table)) { |     if ((unsigned)factor >= ARRAY_SIZE(blend_func_table)) { | ||||||
|         LOG_CRITICAL(Render_OpenGL, "Unknown blend factor %d", factor); |         LOG_CRITICAL(Render_OpenGL, "Unknown blend factor %d", factor); | ||||||
|         UNREACHABLE(); |         UNREACHABLE(); | ||||||
|  |  | ||||||
|         return GL_ONE; |         return GL_ONE; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return blend_func_table[factor]; |     return blend_func_table[(unsigned)factor]; | ||||||
| } | } | ||||||
|  |  | ||||||
| inline GLenum CompareFunc(u32 func) { | inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { | ||||||
|     static const GLenum compare_func_table[] = { |     static const GLenum compare_func_table[] = { | ||||||
|         GL_NEVER,    // CompareFunc::Never |         GL_NEVER,    // CompareFunc::Never | ||||||
|         GL_ALWAYS,   // CompareFunc::Always |         GL_ALWAYS,   // CompareFunc::Always | ||||||
| @@ -84,14 +84,14 @@ inline GLenum CompareFunc(u32 func) { | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // Range check table for input |     // Range check table for input | ||||||
|     if (func >= ARRAY_SIZE(compare_func_table)) { |     if ((unsigned)func >= ARRAY_SIZE(compare_func_table)) { | ||||||
|         LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func); |         LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func); | ||||||
|         UNREACHABLE(); |         UNREACHABLE(); | ||||||
|  |  | ||||||
|         return GL_ALWAYS; |         return GL_ALWAYS; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return compare_func_table[func]; |     return compare_func_table[(unsigned)func]; | ||||||
| } | } | ||||||
|  |  | ||||||
| inline std::array<GLfloat, 4> ColorRGBA8(const u8* bytes) { | inline std::array<GLfloat, 4> ColorRGBA8(const u8* bytes) { | ||||||
|   | |||||||
| @@ -26,55 +26,8 @@ namespace Pica { | |||||||
|  |  | ||||||
| namespace VertexShader { | namespace VertexShader { | ||||||
|  |  | ||||||
| static struct { |  | ||||||
|     Math::Vec4<float24> f[96]; |  | ||||||
|  |  | ||||||
|     std::array<bool,16> b; |  | ||||||
|  |  | ||||||
|     std::array<Math::Vec4<u8>,4> i; |  | ||||||
| } shader_uniforms; |  | ||||||
|  |  | ||||||
| static Math::Vec4<float24> vs_default_attributes[16]; |  | ||||||
|  |  | ||||||
| // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! |  | ||||||
| // For now, we just keep these local arrays around. |  | ||||||
| static std::array<u32, 1024> shader_memory; |  | ||||||
| static std::array<u32, 1024> swizzle_data; |  | ||||||
|  |  | ||||||
| void SubmitShaderMemoryChange(u32 addr, u32 value) { |  | ||||||
|     shader_memory[addr] = value; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void SubmitSwizzleDataChange(u32 addr, u32 value) { |  | ||||||
|     swizzle_data[addr] = value; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| Math::Vec4<float24>& GetFloatUniform(u32 index) { |  | ||||||
|     return shader_uniforms.f[index]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool& GetBoolUniform(u32 index) { |  | ||||||
|     return shader_uniforms.b[index]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| Math::Vec4<u8>& GetIntUniform(u32 index) { |  | ||||||
|     return shader_uniforms.i[index]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| Math::Vec4<float24>& GetDefaultAttribute(u32 index) { |  | ||||||
|     return vs_default_attributes[index]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| const std::array<u32, 1024>& GetShaderBinary() { |  | ||||||
|     return shader_memory; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| const std::array<u32, 1024>& GetSwizzlePatterns() { |  | ||||||
|     return swizzle_data; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct VertexShaderState { | struct VertexShaderState { | ||||||
|     u32* program_counter; |     const u32* program_counter; | ||||||
|  |  | ||||||
|     const float24* input_register_table[16]; |     const float24* input_register_table[16]; | ||||||
|     Math::Vec4<float24> output_registers[16]; |     Math::Vec4<float24> output_registers[16]; | ||||||
| @@ -109,6 +62,9 @@ struct VertexShaderState { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| static void ProcessShaderCode(VertexShaderState& state) { | static void ProcessShaderCode(VertexShaderState& state) { | ||||||
|  |     const auto& uniforms = g_state.vs.uniforms; | ||||||
|  |     const auto& swizzle_data = g_state.vs.swizzle_data; | ||||||
|  |     const auto& program_code = g_state.vs.program_code; | ||||||
|  |  | ||||||
|     // Placeholder for invalid inputs |     // Placeholder for invalid inputs | ||||||
|     static float24 dummy_vec4_float24[4]; |     static float24 dummy_vec4_float24[4]; | ||||||
| @@ -116,14 +72,14 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|     while (true) { |     while (true) { | ||||||
|         if (!state.call_stack.empty()) { |         if (!state.call_stack.empty()) { | ||||||
|             auto& top = state.call_stack.top(); |             auto& top = state.call_stack.top(); | ||||||
|             if (state.program_counter - shader_memory.data() == top.final_address) { |             if (state.program_counter - program_code.data() == top.final_address) { | ||||||
|                 state.address_registers[2] += top.loop_increment; |                 state.address_registers[2] += top.loop_increment; | ||||||
|  |  | ||||||
|                 if (top.repeat_counter-- == 0) { |                 if (top.repeat_counter-- == 0) { | ||||||
|                     state.program_counter = &shader_memory[top.return_address]; |                     state.program_counter = &program_code[top.return_address]; | ||||||
|                     state.call_stack.pop(); |                     state.call_stack.pop(); | ||||||
|                 } else { |                 } else { | ||||||
|                     state.program_counter = &shader_memory[top.loop_address]; |                     state.program_counter = &program_code[top.loop_address]; | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 // TODO: Is "trying again" accurate to hardware? |                 // TODO: Is "trying again" accurate to hardware? | ||||||
| @@ -135,12 +91,12 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|         const Instruction& instr = *(const Instruction*)state.program_counter; |         const Instruction& instr = *(const Instruction*)state.program_counter; | ||||||
|         const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |         const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | ||||||
|  |  | ||||||
|         static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, |         static auto call = [&program_code](VertexShaderState& state, u32 offset, u32 num_instructions, | ||||||
|                               u32 return_offset, u8 repeat_count, u8 loop_increment) { |                               u32 return_offset, u8 repeat_count, u8 loop_increment) { | ||||||
|             state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |             state.program_counter = &program_code[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | ||||||
|             state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |             state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | ||||||
|         }; |         }; | ||||||
|         u32 binary_offset = state.program_counter - shader_memory.data(); |         u32 binary_offset = state.program_counter - program_code.data(); | ||||||
|  |  | ||||||
|         state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset); |         state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset); | ||||||
|  |  | ||||||
| @@ -153,7 +109,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|                 return &state.temporary_registers[source_reg.GetIndex()].x; |                 return &state.temporary_registers[source_reg.GetIndex()].x; | ||||||
|  |  | ||||||
|             case RegisterType::FloatUniform: |             case RegisterType::FloatUniform: | ||||||
|                 return &shader_uniforms.f[source_reg.GetIndex()].x; |                 return &uniforms.f[source_reg.GetIndex()].x; | ||||||
|  |  | ||||||
|             default: |             default: | ||||||
|                 return dummy_vec4_float24; |                 return dummy_vec4_float24; | ||||||
| @@ -471,13 +427,13 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|  |  | ||||||
|             case OpCode::Id::JMPC: |             case OpCode::Id::JMPC: | ||||||
|                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||||||
|                     state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; |                     state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; | ||||||
|                 } |                 } | ||||||
|                 break; |                 break; | ||||||
|  |  | ||||||
|             case OpCode::Id::JMPU: |             case OpCode::Id::JMPU: | ||||||
|                 if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { |                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||||
|                     state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; |                     state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; | ||||||
|                 } |                 } | ||||||
|                 break; |                 break; | ||||||
|  |  | ||||||
| @@ -489,7 +445,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|                 break; |                 break; | ||||||
|  |  | ||||||
|             case OpCode::Id::CALLU: |             case OpCode::Id::CALLU: | ||||||
|                 if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { |                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||||
|                     call(state, |                     call(state, | ||||||
|                         instr.flow_control.dest_offset, |                         instr.flow_control.dest_offset, | ||||||
|                         instr.flow_control.num_instructions, |                         instr.flow_control.num_instructions, | ||||||
| @@ -510,7 +466,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|                 break; |                 break; | ||||||
|  |  | ||||||
|             case OpCode::Id::IFU: |             case OpCode::Id::IFU: | ||||||
|                 if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { |                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||||
|                     call(state, |                     call(state, | ||||||
|                          binary_offset + 1, |                          binary_offset + 1, | ||||||
|                          instr.flow_control.dest_offset - binary_offset - 1, |                          instr.flow_control.dest_offset - binary_offset - 1, | ||||||
| @@ -545,14 +501,14 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|  |  | ||||||
|             case OpCode::Id::LOOP: |             case OpCode::Id::LOOP: | ||||||
|             { |             { | ||||||
|                 state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y; |                 state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; | ||||||
|  |  | ||||||
|                 call(state, |                 call(state, | ||||||
|                      binary_offset + 1, |                      binary_offset + 1, | ||||||
|                      instr.flow_control.dest_offset - binary_offset + 1, |                      instr.flow_control.dest_offset - binary_offset + 1, | ||||||
|                      instr.flow_control.dest_offset + 1, |                      instr.flow_control.dest_offset + 1, | ||||||
|                      shader_uniforms.i[instr.flow_control.int_uniform_id].x, |                      uniforms.i[instr.flow_control.int_uniform_id].x, | ||||||
|                      shader_uniforms.i[instr.flow_control.int_uniform_id].z); |                      uniforms.i[instr.flow_control.int_uniform_id].z); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|  |  | ||||||
| @@ -578,15 +534,17 @@ static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | |||||||
| OutputVertex RunShader(const InputVertex& input, int num_attributes) { | OutputVertex RunShader(const InputVertex& input, int num_attributes) { | ||||||
|     Common::Profiling::ScopeTimer timer(shader_category); |     Common::Profiling::ScopeTimer timer(shader_category); | ||||||
|  |  | ||||||
|  |     const auto& regs = g_state.regs; | ||||||
|  |     const auto& vs = g_state.vs; | ||||||
|     VertexShaderState state; |     VertexShaderState state; | ||||||
|  |  | ||||||
|     const u32* main = &shader_memory[registers.vs_main_offset]; |     const u32* main = &vs.program_code[regs.vs_main_offset]; | ||||||
|     state.program_counter = (u32*)main; |     state.program_counter = (u32*)main; | ||||||
|     state.debug.max_offset = 0; |     state.debug.max_offset = 0; | ||||||
|     state.debug.max_opdesc_id = 0; |     state.debug.max_opdesc_id = 0; | ||||||
|  |  | ||||||
|     // Setup input register table |     // Setup input register table | ||||||
|     const auto& attribute_register_map = registers.vs_input_register_map; |     const auto& attribute_register_map = regs.vs_input_register_map; | ||||||
|     float24 dummy_register; |     float24 dummy_register; | ||||||
|     boost::fill(state.input_register_table, &dummy_register); |     boost::fill(state.input_register_table, &dummy_register); | ||||||
|      |      | ||||||
| @@ -611,16 +569,16 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { | |||||||
|     state.conditional_code[1] = false; |     state.conditional_code[1] = false; | ||||||
|  |  | ||||||
|     ProcessShaderCode(state); |     ProcessShaderCode(state); | ||||||
|     DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), |     DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(), | ||||||
|                            state.debug.max_opdesc_id, registers.vs_main_offset, |                            state.debug.max_opdesc_id, regs.vs_main_offset, | ||||||
|                            registers.vs_output_attributes); |                            regs.vs_output_attributes); | ||||||
|  |  | ||||||
|     // Setup output data |     // Setup output data | ||||||
|     OutputVertex ret; |     OutputVertex ret; | ||||||
|     // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to |     // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||||||
|     // figure out what those circumstances are and enable the remaining outputs then. |     // figure out what those circumstances are and enable the remaining outputs then. | ||||||
|     for (int i = 0; i < 7; ++i) { |     for (int i = 0; i < 7; ++i) { | ||||||
|         const auto& output_register_map = registers.vs_output_attributes[i]; |         const auto& output_register_map = regs.vs_output_attributes[i]; | ||||||
|  |  | ||||||
|         u32 semantics[4] = { |         u32 semantics[4] = { | ||||||
|             output_register_map.map_x, output_register_map.map_y, |             output_register_map.map_x, output_register_map.map_y, | ||||||
|   | |||||||
| @@ -66,19 +66,8 @@ struct OutputVertex { | |||||||
| static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||||
| static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | ||||||
|  |  | ||||||
| void SubmitShaderMemoryChange(u32 addr, u32 value); |  | ||||||
| void SubmitSwizzleDataChange(u32 addr, u32 value); |  | ||||||
|  |  | ||||||
| OutputVertex RunShader(const InputVertex& input, int num_attributes); | OutputVertex RunShader(const InputVertex& input, int num_attributes); | ||||||
|  |  | ||||||
| Math::Vec4<float24>& GetFloatUniform(u32 index); |  | ||||||
| bool& GetBoolUniform(u32 index); |  | ||||||
| Math::Vec4<u8>& GetIntUniform(u32 index); |  | ||||||
| Math::Vec4<float24>& GetDefaultAttribute(u32 index); |  | ||||||
|  |  | ||||||
| const std::array<u32, 1024>& GetShaderBinary(); |  | ||||||
| const std::array<u32, 1024>& GetSwizzlePatterns(); |  | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|   | |||||||
| @@ -8,9 +8,11 @@ | |||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/settings.h" | #include "core/settings.h" | ||||||
|  |  | ||||||
| #include "video_core/video_core.h" | #include "video_core.h" | ||||||
| #include "video_core/renderer_base.h" | #include "renderer_base.h" | ||||||
| #include "video_core/renderer_opengl/renderer_opengl.h" | #include "renderer_opengl/renderer_opengl.h" | ||||||
|  |  | ||||||
|  | #include "pica.h" | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Video Core namespace | // Video Core namespace | ||||||
| @@ -24,6 +26,8 @@ std::atomic<bool> g_hw_renderer_enabled; | |||||||
|  |  | ||||||
| /// Initialize the video core | /// Initialize the video core | ||||||
| void Init(EmuWindow* emu_window) { | void Init(EmuWindow* emu_window) { | ||||||
|  |     Pica::Init(); | ||||||
|  |  | ||||||
|     g_emu_window = emu_window; |     g_emu_window = emu_window; | ||||||
|     g_renderer = new RendererOpenGL(); |     g_renderer = new RendererOpenGL(); | ||||||
|     g_renderer->SetWindow(g_emu_window); |     g_renderer->SetWindow(g_emu_window); | ||||||
| @@ -34,7 +38,10 @@ void Init(EmuWindow* emu_window) { | |||||||
|  |  | ||||||
| /// Shutdown the video core | /// Shutdown the video core | ||||||
| void Shutdown() { | void Shutdown() { | ||||||
|  |     Pica::Shutdown(); | ||||||
|  |  | ||||||
|     delete g_renderer; |     delete g_renderer; | ||||||
|  |  | ||||||
|     LOG_DEBUG(Render, "shutdown OK"); |     LOG_DEBUG(Render, "shutdown OK"); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user