Pica: Add debug utilities for dumping shaders.
This commit is contained in:
		| @@ -2,6 +2,7 @@ | |||||||
| // Licensed under GPLv2 | // Licensed under GPLv2 | ||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include <algorithm> | ||||||
| #include <fstream> | #include <fstream> | ||||||
| #include <string> | #include <string> | ||||||
|  |  | ||||||
| @@ -55,6 +56,210 @@ void GeometryDumper::Dump() { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #pragma pack(1) | ||||||
|  | struct DVLBHeader { | ||||||
|  |     enum : u32 { | ||||||
|  |         MAGIC_WORD = 0x424C5644, // "DVLB" | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     u32 magic_word; | ||||||
|  |     u32 num_programs; | ||||||
|  | //    u32 dvle_offset_table[]; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size"); | ||||||
|  |  | ||||||
|  | struct DVLPHeader { | ||||||
|  |     enum : u32 { | ||||||
|  |         MAGIC_WORD = 0x504C5644, // "DVLP" | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     u32 magic_word; | ||||||
|  |     u32 version; | ||||||
|  |     u32 binary_offset;  // relative to DVLP start | ||||||
|  |     u32 binary_size_words; | ||||||
|  |     u32 swizzle_patterns_offset; | ||||||
|  |     u32 swizzle_patterns_num_entries; | ||||||
|  |     u32 unk2; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size"); | ||||||
|  |  | ||||||
|  | struct DVLEHeader { | ||||||
|  |     enum : u32 { | ||||||
|  |         MAGIC_WORD = 0x454c5644, // "DVLE" | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     enum class ShaderType : u8 { | ||||||
|  |         VERTEX = 0, | ||||||
|  |         GEOMETRY = 1, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     u32 magic_word; | ||||||
|  |     u16 pad1; | ||||||
|  |     ShaderType type; | ||||||
|  |     u8 pad2; | ||||||
|  |     u32 main_offset_words; // offset within binary blob | ||||||
|  |     u32 endmain_offset_words; | ||||||
|  |     u32 pad3; | ||||||
|  |     u32 pad4; | ||||||
|  |     u32 constant_table_offset; | ||||||
|  |     u32 constant_table_size; // number of entries | ||||||
|  |     u32 label_table_offset; | ||||||
|  |     u32 label_table_size; | ||||||
|  |     u32 output_register_table_offset; | ||||||
|  |     u32 output_register_table_size; | ||||||
|  |     u32 uniform_table_offset; | ||||||
|  |     u32 uniform_table_size; | ||||||
|  |     u32 symbol_table_offset; | ||||||
|  |     u32 symbol_table_size; | ||||||
|  |  | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size"); | ||||||
|  | #pragma pack() | ||||||
|  |  | ||||||
|  | void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | ||||||
|  |                 u32 main_offset, const Regs::VSOutputAttributes* output_attributes) | ||||||
|  | { | ||||||
|  |     // NOTE: Permanently enabling this just trashes hard disks for no reason. | ||||||
|  |     //       Hence, this is currently disabled. | ||||||
|  |     return; | ||||||
|  |  | ||||||
|  |     struct StuffToWrite { | ||||||
|  |         u8* pointer; | ||||||
|  |         u32 size; | ||||||
|  |     }; | ||||||
|  |     std::vector<StuffToWrite> writing_queue; | ||||||
|  |     u32 write_offset = 0; | ||||||
|  |  | ||||||
|  |     auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) { | ||||||
|  |         writing_queue.push_back({pointer, size}); | ||||||
|  |         u32 old_write_offset = write_offset; | ||||||
|  |         write_offset += size; | ||||||
|  |         return old_write_offset; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // First off, try to translate Pica state (one enum for output attribute type and component) | ||||||
|  |     // into shbin format (separate type and component mask). | ||||||
|  |     union OutputRegisterInfo { | ||||||
|  |         enum Type : u64 { | ||||||
|  |             POSITION = 0, | ||||||
|  |             COLOR = 2, | ||||||
|  |             TEXCOORD0 = 3, | ||||||
|  |             TEXCOORD1 = 5, | ||||||
|  |             TEXCOORD2 = 6, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         BitField< 0, 64, u64> hex; | ||||||
|  |  | ||||||
|  |         BitField< 0, 16, Type> type; | ||||||
|  |         BitField<16, 16, u64> id; | ||||||
|  |         BitField<32,  4, u64> component_mask; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // This is put into a try-catch block to make sure we notice unknown configurations. | ||||||
|  |     std::vector<OutputRegisterInfo> output_info_table; | ||||||
|  |         for (int i = 0; i < 7; ++i) { | ||||||
|  |             using OutputAttributes = Pica::Regs::VSOutputAttributes; | ||||||
|  |  | ||||||
|  |             // TODO: It's still unclear how the attribute components map to the register! | ||||||
|  |             //       Once we know that, this code probably will not make much sense anymore. | ||||||
|  |             std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = { | ||||||
|  |                 { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, | ||||||
|  |                 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, | ||||||
|  |                 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, | ||||||
|  |                 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, | ||||||
|  |                 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, | ||||||
|  |                 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, | ||||||
|  |                 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, | ||||||
|  |                 { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, | ||||||
|  |                 { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, | ||||||
|  |                 { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, | ||||||
|  |                 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, | ||||||
|  |                 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, | ||||||
|  |                 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, | ||||||
|  |                 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ | ||||||
|  |                                                 output_attributes[i].map_x, | ||||||
|  |                                                 output_attributes[i].map_y, | ||||||
|  |                                                 output_attributes[i].map_z, | ||||||
|  |                                                 output_attributes[i].map_w     }) { | ||||||
|  |                 if (semantic == OutputAttributes::INVALID) | ||||||
|  |                     continue; | ||||||
|  |  | ||||||
|  |                 try { | ||||||
|  |                     OutputRegisterInfo::Type type = map.at(semantic).first; | ||||||
|  |                     u32 component_mask = map.at(semantic).second; | ||||||
|  |  | ||||||
|  |                     auto it = std::find_if(output_info_table.begin(), output_info_table.end(), | ||||||
|  |                                         [&i, &type](const OutputRegisterInfo& info) { | ||||||
|  |                                             return info.id == i && info.type == type; | ||||||
|  |                                         } | ||||||
|  |                                         ); | ||||||
|  |  | ||||||
|  |                     if (it == output_info_table.end()) { | ||||||
|  |                         output_info_table.push_back({}); | ||||||
|  |                         output_info_table.back().type = type; | ||||||
|  |                         output_info_table.back().component_mask = component_mask; | ||||||
|  |                         output_info_table.back().id = i; | ||||||
|  |                     } else { | ||||||
|  |                         it->component_mask = it->component_mask | component_mask; | ||||||
|  |                     } | ||||||
|  |                 } catch (const std::out_of_range& oor) { | ||||||
|  |                     _dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping"); | ||||||
|  |                     ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", | ||||||
|  |                               (int)output_attributes[i].map_x.Value(), | ||||||
|  |                               (int)output_attributes[i].map_y.Value(), | ||||||
|  |                               (int)output_attributes[i].map_z.Value(), | ||||||
|  |                               (int)output_attributes[i].map_w.Value()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     struct { | ||||||
|  |         DVLBHeader header; | ||||||
|  |         u32 dvle_offset; | ||||||
|  |     } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE | ||||||
|  |  | ||||||
|  |     DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; | ||||||
|  |     DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; | ||||||
|  |  | ||||||
|  |     QueueForWriting((u8*)&dvlb, sizeof(dvlb)); | ||||||
|  |     u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp)); | ||||||
|  |     dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle)); | ||||||
|  |  | ||||||
|  |     // TODO: Reduce the amount of binary code written to relevant portions | ||||||
|  |     dvlp.binary_offset = write_offset - dvlp_offset; | ||||||
|  |     dvlp.binary_size_words = binary_size; | ||||||
|  |     QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); | ||||||
|  |  | ||||||
|  |     dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; | ||||||
|  |     dvlp.swizzle_patterns_num_entries = swizzle_size; | ||||||
|  |     u32 dummy = 0; | ||||||
|  |     for (int i = 0; i < swizzle_size; ++i) { | ||||||
|  |         QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); | ||||||
|  |         QueueForWriting((u8*)&dummy, sizeof(dummy)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     dvle.main_offset_words = main_offset; | ||||||
|  |     dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; | ||||||
|  |     dvle.output_register_table_size = output_info_table.size(); | ||||||
|  |     QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo)); | ||||||
|  |  | ||||||
|  |     // TODO: Create a label table for "main" | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     // Write data to file | ||||||
|  |     static int dump_index = 0; | ||||||
|  |     std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); | ||||||
|  |     std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); | ||||||
|  |  | ||||||
|  |     for (auto& chunk : writing_queue) { | ||||||
|  |         file.write((char*)chunk.pointer, chunk.size); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|   | |||||||
| @@ -35,6 +35,9 @@ private: | |||||||
|     std::vector<Face> faces; |     std::vector<Face> faces; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | ||||||
|  |                 u32 main_offset, const Regs::VSOutputAttributes* output_attributes); | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|   | |||||||
| @@ -57,7 +57,7 @@ struct Regs { | |||||||
|  |  | ||||||
|     INSERT_PADDING_WORDS(0x1); |     INSERT_PADDING_WORDS(0x1); | ||||||
|  |  | ||||||
|     union { |     union VSOutputAttributes { | ||||||
|         // Maps components of output vertex attributes to semantics |         // Maps components of output vertex attributes to semantics | ||||||
|         enum Semantic : u32 |         enum Semantic : u32 | ||||||
|         { |         { | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ | |||||||
|  |  | ||||||
| #include "pica.h" | #include "pica.h" | ||||||
| #include "vertex_shader.h" | #include "vertex_shader.h" | ||||||
|  | #include "debug_utils/debug_utils.h" | ||||||
| #include <core/mem_map.h> | #include <core/mem_map.h> | ||||||
| #include <common/file_util.h> | #include <common/file_util.h> | ||||||
|  |  | ||||||
| @@ -50,6 +51,11 @@ struct VertexShaderState { | |||||||
|     }; |     }; | ||||||
|     u32 call_stack[8]; // TODO: What is the maximal call stack depth? |     u32 call_stack[8]; // TODO: What is the maximal call stack depth? | ||||||
|     u32* call_stack_pointer; |     u32* call_stack_pointer; | ||||||
|  |  | ||||||
|  |     struct { | ||||||
|  |         u32 max_offset; // maximum program counter ever reached | ||||||
|  |         u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||||||
|  |     } debug; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| static void ProcessShaderCode(VertexShaderState& state) { | static void ProcessShaderCode(VertexShaderState& state) { | ||||||
| @@ -57,6 +63,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|         bool increment_pc = true; |         bool increment_pc = true; | ||||||
|         bool exit_loop = false; |         bool exit_loop = false; | ||||||
|         const Instruction& instr = *(const Instruction*)state.program_counter; |         const Instruction& instr = *(const Instruction*)state.program_counter; | ||||||
|  |         state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); | ||||||
|  |  | ||||||
|         const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] |         const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] | ||||||
|                              : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x |                              : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x | ||||||
| @@ -88,6 +95,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|         switch (instr.opcode) { |         switch (instr.opcode) { | ||||||
|             case Instruction::OpCode::ADD: |             case Instruction::OpCode::ADD: | ||||||
|             { |             { | ||||||
|  |                 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| @@ -100,6 +108,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|  |  | ||||||
|             case Instruction::OpCode::MUL: |             case Instruction::OpCode::MUL: | ||||||
|             { |             { | ||||||
|  |                 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| @@ -113,6 +122,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|             case Instruction::OpCode::DP3: |             case Instruction::OpCode::DP3: | ||||||
|             case Instruction::OpCode::DP4: |             case Instruction::OpCode::DP4: | ||||||
|             { |             { | ||||||
|  |                 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||||||
|                 float24 dot = float24::FromFloat32(0.f); |                 float24 dot = float24::FromFloat32(0.f); | ||||||
|                 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; |                 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | ||||||
|                 for (int i = 0; i < num_components; ++i) |                 for (int i = 0; i < num_components; ++i) | ||||||
| @@ -130,6 +140,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|             // Reciprocal |             // Reciprocal | ||||||
|             case Instruction::OpCode::RCP: |             case Instruction::OpCode::RCP: | ||||||
|             { |             { | ||||||
|  |                 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| @@ -145,6 +156,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|             // Reciprocal Square Root |             // Reciprocal Square Root | ||||||
|             case Instruction::OpCode::RSQ: |             case Instruction::OpCode::RSQ: | ||||||
|             { |             { | ||||||
|  |                 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| @@ -159,6 +171,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||||||
|  |  | ||||||
|             case Instruction::OpCode::MOV: |             case Instruction::OpCode::MOV: | ||||||
|             { |             { | ||||||
|  |                 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| @@ -212,6 +225,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||||||
|  |  | ||||||
|     const u32* main = &shader_memory[registers.vs_main_offset]; |     const u32* main = &shader_memory[registers.vs_main_offset]; | ||||||
|     state.program_counter = (u32*)main; |     state.program_counter = (u32*)main; | ||||||
|  |     state.debug.max_offset = 0; | ||||||
|  |     state.debug.max_opdesc_id = 0; | ||||||
|  |  | ||||||
|     // Setup input register table |     // Setup input register table | ||||||
|     const auto& attribute_register_map = registers.vs_input_register_map; |     const auto& attribute_register_map = registers.vs_input_register_map; | ||||||
| @@ -255,6 +270,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||||||
|     state.call_stack_pointer = &state.call_stack[0]; |     state.call_stack_pointer = &state.call_stack[0]; | ||||||
|  |  | ||||||
|     ProcessShaderCode(state); |     ProcessShaderCode(state); | ||||||
|  |     DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, | ||||||
|  |                            state.debug.max_opdesc_id, registers.vs_main_offset, | ||||||
|  |                            registers.vs_output_attributes); | ||||||
|  |  | ||||||
|     DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |     DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||||||
|         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user