shader: Fix tracking
This commit is contained in:
		| @@ -142,6 +142,58 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | struct LowAddrInfo { | ||||||
|  |     IR::U32 value; | ||||||
|  |     s32 imm_offset; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | /// Tries to track the first 32-bits of a global memory instruction | ||||||
|  | std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { | ||||||
|  |     // The first argument is the low level GPU pointer to the global memory instruction | ||||||
|  |     const IR::U64 addr{inst->Arg(0)}; | ||||||
|  |     if (addr.IsImmediate()) { | ||||||
|  |         // Not much we can do if it's an immediate | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     // This address is expected to either be a PackUint2x32 or a IAdd64 | ||||||
|  |     IR::Inst* addr_inst{addr.InstRecursive()}; | ||||||
|  |     s32 imm_offset{0}; | ||||||
|  |     if (addr_inst->Opcode() == IR::Opcode::IAdd64) { | ||||||
|  |         // If it's an IAdd64, get the immediate offset it is applying and grab the address | ||||||
|  |         // instruction. This expects for the instruction to be canonicalized having the address on | ||||||
|  |         // the first argument and the immediate offset on the second one. | ||||||
|  |         const IR::U64 imm_offset_value{addr_inst->Arg(1)}; | ||||||
|  |         if (!imm_offset_value.IsImmediate()) { | ||||||
|  |             return std::nullopt; | ||||||
|  |         } | ||||||
|  |         imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); | ||||||
|  |         const IR::U64 iadd_addr{addr_inst->Arg(0)}; | ||||||
|  |         if (iadd_addr.IsImmediate()) { | ||||||
|  |             return std::nullopt; | ||||||
|  |         } | ||||||
|  |         addr_inst = iadd_addr.Inst(); | ||||||
|  |     } | ||||||
|  |     // With IAdd64 handled, now PackUint2x32 is expected without exceptions | ||||||
|  |     if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     // PackUint2x32 is expected to be generated from a vector | ||||||
|  |     const IR::Value vector{addr_inst->Arg(0)}; | ||||||
|  |     if (vector.IsImmediate()) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     // This vector is expected to be a CompositeConstructU32x2 | ||||||
|  |     IR::Inst* const vector_inst{vector.InstRecursive()}; | ||||||
|  |     if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     // Grab the first argument from the CompositeConstructU32x2, this is the low address. | ||||||
|  |     return LowAddrInfo{ | ||||||
|  |         .value{IR::U32{vector_inst->Arg(0)}}, | ||||||
|  |         .imm_offset{imm_offset}, | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Recursively tries to track the storage buffer address used by a global memory instruction | /// Recursively tries to track the storage buffer address used by a global memory instruction | ||||||
| std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | ||||||
|     if (value.IsImmediate()) { |     if (value.IsImmediate()) { | ||||||
| @@ -191,13 +243,26 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | |||||||
|     }; |     }; | ||||||
|     // First try to find storage buffers in the NVN address |     // First try to find storage buffers in the NVN address | ||||||
|     const IR::U64 addr{inst->Arg(0)}; |     const IR::U64 addr{inst->Arg(0)}; | ||||||
|     std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)}; |     if (addr.IsImmediate()) { | ||||||
|  |         // Immediate addresses can't be lowered to a storage buffer | ||||||
|  |         DiscardGlobalMemory(block, inst); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     // Track the low address of the instruction | ||||||
|  |     const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(addr.InstRecursive())}; | ||||||
|  |     if (!low_addr_info) { | ||||||
|  |         DiscardGlobalMemory(block, inst); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     const IR::U32 low_addr{low_addr_info->value}; | ||||||
|  |     std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; | ||||||
|     if (!storage_buffer) { |     if (!storage_buffer) { | ||||||
|         // If it fails, track without a bias |         // If it fails, track without a bias | ||||||
|         storage_buffer = Track(addr, nullptr); |         storage_buffer = Track(low_addr, nullptr); | ||||||
|         if (!storage_buffer) { |         if (!storage_buffer) { | ||||||
|             // If that also failed, drop the global memory usage |             // If that also failed, drop the global memory usage | ||||||
|             DiscardGlobalMemory(block, inst); |             DiscardGlobalMemory(block, inst); | ||||||
|  |             return; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     // Collect storage buffer and the instruction |     // Collect storage buffer and the instruction | ||||||
| @@ -208,58 +273,15 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | |||||||
|     }); |     }); | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Tries to track the first 32-bits of a global memory instruction |  | ||||||
| std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { |  | ||||||
|     // The first argument is the low level GPU pointer to the global memory instruction |  | ||||||
|     const IR::U64 addr{inst->Arg(0)}; |  | ||||||
|     if (addr.IsImmediate()) { |  | ||||||
|         // Not much we can do if it's an immediate |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     // This address is expected to either be a PackUint2x32 or a IAdd64 |  | ||||||
|     IR::Inst* addr_inst{addr.InstRecursive()}; |  | ||||||
|     s32 imm_offset{0}; |  | ||||||
|     if (addr_inst->Opcode() == IR::Opcode::IAdd64) { |  | ||||||
|         // If it's an IAdd64, get the immediate offset it is applying and grab the address |  | ||||||
|         // instruction. This expects for the instruction to be canonicalized having the address on |  | ||||||
|         // the first argument and the immediate offset on the second one. |  | ||||||
|         const IR::U64 imm_offset_value{addr_inst->Arg(1)}; |  | ||||||
|         if (!imm_offset_value.IsImmediate()) { |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
|         imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); |  | ||||||
|         const IR::U64 iadd_addr{addr_inst->Arg(0)}; |  | ||||||
|         if (iadd_addr.IsImmediate()) { |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
|         addr_inst = iadd_addr.Inst(); |  | ||||||
|     } |  | ||||||
|     // With IAdd64 handled, now PackUint2x32 is expected without exceptions |  | ||||||
|     if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     // PackUint2x32 is expected to be generated from a vector |  | ||||||
|     const IR::Value vector{addr_inst->Arg(0)}; |  | ||||||
|     if (vector.IsImmediate()) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     // This vector is expected to be a CompositeConstructU32x2 |  | ||||||
|     IR::Inst* const vector_inst{vector.InstRecursive()}; |  | ||||||
|     if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     // Grab the first argument from the CompositeConstructU32x2, this is the low address. |  | ||||||
|     // Re-apply the offset in case we found one. |  | ||||||
|     const IR::U32 low_addr{vector_inst->Arg(0)}; |  | ||||||
|     return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// Returns the offset in indices (not bytes) for an equivalent storage instruction | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | ||||||
| IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { | IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { | ||||||
|     IR::IREmitter ir{block, inst}; |     IR::IREmitter ir{block, inst}; | ||||||
|     IR::U32 offset; |     IR::U32 offset; | ||||||
|     if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) { |     if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&*inst)}) { | ||||||
|         offset = *low_addr; |         offset = low_addr->value; | ||||||
|  |         if (low_addr->imm_offset != 0) { | ||||||
|  |             offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); | ||||||
|  |         } | ||||||
|     } else { |     } else { | ||||||
|         offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); |         offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user