morton_swizzle: Move out of bounds texture check out of the decode loop
* Running relative expensive checks like this on a hot path causes small but measurable performance loss. Tested SMD wit this and it doesn't crash
This commit is contained in:
@@ -398,6 +398,33 @@ bool MemorySystem::IsValidPhysicalAddress(const PAddr paddr) const {
|
|||||||
return GetPhysicalPointer(paddr) != nullptr;
|
return GetPhysicalPointer(paddr) != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PAddr MemorySystem::ClampPhysicalAddress(PAddr base, PAddr address) const {
|
||||||
|
struct MemoryArea {
|
||||||
|
PAddr paddr_base;
|
||||||
|
u32 size;
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::array memory_areas = {
|
||||||
|
MemoryArea{VRAM_PADDR, VRAM_SIZE},
|
||||||
|
MemoryArea{DSP_RAM_PADDR, DSP_RAM_SIZE},
|
||||||
|
MemoryArea{FCRAM_PADDR, FCRAM_N3DS_SIZE},
|
||||||
|
MemoryArea{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE},
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto area =
|
||||||
|
std::ranges::find_if(memory_areas, [&](const MemoryArea& area) {
|
||||||
|
return base >= area.paddr_base && base <= area.paddr_base + area.size;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (area == memory_areas.end()) {
|
||||||
|
LOG_ERROR(HW_Memory, "Unknown base address used for clamping {:#08X} at PC {:#08X}", base,
|
||||||
|
Core::GetRunningCore().GetPC());
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::clamp(address, area->paddr_base, area->paddr_base + area->size);
|
||||||
|
}
|
||||||
|
|
||||||
u8* MemorySystem::GetPointer(const VAddr vaddr) {
|
u8* MemorySystem::GetPointer(const VAddr vaddr) {
|
||||||
u8* page_pointer = impl->current_page_table->pointers[vaddr >> PAGE_BITS];
|
u8* page_pointer = impl->current_page_table->pointers[vaddr >> PAGE_BITS];
|
||||||
if (page_pointer) {
|
if (page_pointer) {
|
||||||
@@ -457,22 +484,22 @@ MemoryRef MemorySystem::GetPhysicalRef(PAddr address) const {
|
|||||||
u32 size;
|
u32 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr MemoryArea memory_areas[] = {
|
constexpr std::array memory_areas = {
|
||||||
{VRAM_PADDR, VRAM_SIZE},
|
MemoryArea{VRAM_PADDR, VRAM_SIZE},
|
||||||
{DSP_RAM_PADDR, DSP_RAM_SIZE},
|
MemoryArea{DSP_RAM_PADDR, DSP_RAM_SIZE},
|
||||||
{FCRAM_PADDR, FCRAM_N3DS_SIZE},
|
MemoryArea{FCRAM_PADDR, FCRAM_N3DS_SIZE},
|
||||||
{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE},
|
MemoryArea{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE},
|
||||||
};
|
};
|
||||||
|
|
||||||
const auto area =
|
const auto area =
|
||||||
std::find_if(std::begin(memory_areas), std::end(memory_areas), [&](const auto& area) {
|
std::ranges::find_if(memory_areas, [&](const MemoryArea& area) {
|
||||||
// Note: the region end check is inclusive because the user can pass in an address that
|
// Note: the region end check is inclusive because the user can pass in an address that
|
||||||
// represents an open right bound
|
// represents an open right bound
|
||||||
return address >= area.paddr_base && address <= area.paddr_base + area.size;
|
return address >= area.paddr_base && address <= area.paddr_base + area.size;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (area == std::end(memory_areas)) {
|
if (area == memory_areas.end()) {
|
||||||
LOG_ERROR(HW_Memory, "unknown GetPhysicalPointer @ 0x{:08X} at PC 0x{:08X}", address,
|
LOG_ERROR(HW_Memory, "Unknown GetPhysicalPointer @ {:#08X} at PC {:#08X}", address,
|
||||||
Core::GetRunningCore().GetPC());
|
Core::GetRunningCore().GetPC());
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@@ -350,8 +350,12 @@ public:
|
|||||||
u8* GetPointer(VAddr vaddr);
|
u8* GetPointer(VAddr vaddr);
|
||||||
const u8* GetPointer(VAddr vaddr) const;
|
const u8* GetPointer(VAddr vaddr) const;
|
||||||
|
|
||||||
|
/// Returns true if the address refers to a valid memory region
|
||||||
bool IsValidPhysicalAddress(PAddr paddr) const;
|
bool IsValidPhysicalAddress(PAddr paddr) const;
|
||||||
|
|
||||||
|
/// Clamps the address to the boundaries of the memory region pointed by base
|
||||||
|
PAddr ClampPhysicalAddress(PAddr base, PAddr address) const;
|
||||||
|
|
||||||
/// Gets offset in FCRAM from a pointer inside FCRAM range
|
/// Gets offset in FCRAM from a pointer inside FCRAM range
|
||||||
u32 GetFCRAMOffset(const u8* pointer) const;
|
u32 GetFCRAMOffset(const u8* pointer) const;
|
||||||
|
|
||||||
|
@@ -73,7 +73,7 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA
|
|||||||
|
|
||||||
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
|
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
|
||||||
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
|
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
|
||||||
const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
|
PAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
|
||||||
|
|
||||||
ASSERT(!morton_to_linear || (aligned_start == start && aligned_end == end));
|
ASSERT(!morton_to_linear || (aligned_start == start && aligned_end == end));
|
||||||
|
|
||||||
@@ -81,9 +81,12 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA
|
|||||||
u32 x = (begin_pixel_index % (stride * 8)) / 8;
|
u32 x = (begin_pixel_index % (stride * 8)) / 8;
|
||||||
u32 y = (begin_pixel_index / (stride * 8)) * 8;
|
u32 y = (begin_pixel_index / (stride * 8)) * 8;
|
||||||
|
|
||||||
|
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
||||||
|
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
||||||
|
// the shader we read/write the linear buffer backwards
|
||||||
linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
|
linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
|
||||||
|
|
||||||
auto linearbuf_next_tile = [&] {
|
auto linear_next_tile = [&] {
|
||||||
x = (x + 8) % stride;
|
x = (x + 8) % stride;
|
||||||
linear_buffer += 8 * aligned_bytes_per_pixel;
|
linear_buffer += 8 * aligned_bytes_per_pixel;
|
||||||
if (!x) {
|
if (!x) {
|
||||||
@@ -94,36 +97,38 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA
|
|||||||
|
|
||||||
u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start);
|
u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start);
|
||||||
|
|
||||||
|
// If during a texture download the start coordinate is inside a tile, swizzle
|
||||||
|
// the tile to a temporary buffer and copy the part we are interested in
|
||||||
if (start < aligned_start && !morton_to_linear) {
|
if (start < aligned_start && !morton_to_linear) {
|
||||||
std::array<u8, tile_size> tmp_buf;
|
std::array<u8, tile_size> tmp_buf;
|
||||||
MortonCopyTile<morton_to_linear, format>(stride, &tmp_buf[0], linear_buffer);
|
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf.data(), linear_buffer);
|
||||||
std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start],
|
std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start,
|
||||||
std::min(aligned_start, end) - start);
|
std::min(aligned_start, end) - start);
|
||||||
|
|
||||||
tile_buffer += aligned_start - start;
|
tile_buffer += aligned_start - start;
|
||||||
linearbuf_next_tile();
|
linear_next_tile();
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8* const buffer_end = tile_buffer + aligned_end - aligned_start;
|
// Pokemon Super Mystery Dungeon will try to use textures that go beyond
|
||||||
PAddr current_paddr = aligned_start;
|
// the end address of VRAM. Clamp the address to the end of VRAM if that happens
|
||||||
|
// TODO: Move this to the rasterizer cache
|
||||||
|
if (const u32 clamped_end = VideoCore::g_memory->ClampPhysicalAddress(aligned_start, aligned_end);
|
||||||
|
clamped_end != aligned_end) {
|
||||||
|
LOG_ERROR(Render_OpenGL, "Out of bound texture read address {:#x}, clamping to {:#x}", aligned_end, clamped_end);
|
||||||
|
aligned_end = clamped_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u8* buffer_end = tile_buffer + aligned_end - aligned_start;
|
||||||
while (tile_buffer < buffer_end) {
|
while (tile_buffer < buffer_end) {
|
||||||
// Pokemon Super Mystery Dungeon will try to use textures that go beyond
|
|
||||||
// the end address of VRAM. Stop reading if reaches invalid address
|
|
||||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) ||
|
|
||||||
!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) {
|
|
||||||
LOG_ERROR(Render_OpenGL, "Out of bound texture");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
MortonCopyTile<morton_to_linear, format>(stride, tile_buffer, linear_buffer);
|
MortonCopyTile<morton_to_linear, format>(stride, tile_buffer, linear_buffer);
|
||||||
tile_buffer += tile_size;
|
tile_buffer += tile_size;
|
||||||
current_paddr += tile_size;
|
linear_next_tile();
|
||||||
linearbuf_next_tile();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) {
|
if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) {
|
||||||
std::array<u8, tile_size> tmp_buf;
|
std::array<u8, tile_size> tmp_buf;
|
||||||
MortonCopyTile<morton_to_linear, format>(stride, &tmp_buf[0], linear_buffer);
|
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf.data(), linear_buffer);
|
||||||
std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end);
|
std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user