Merge pull request #3281 from jroweboy/texcache-pt2
Texture Cache Rework
This commit is contained in:
@ -16,8 +16,8 @@ static const float TOP_SCREEN_ASPECT_RATIO =
|
||||
static const float BOT_SCREEN_ASPECT_RATIO =
|
||||
static_cast<float>(Core::kScreenBottomHeight) / Core::kScreenBottomWidth;
|
||||
|
||||
float FramebufferLayout::GetScalingRatio() const {
|
||||
return static_cast<float>(top_screen.GetWidth()) / Core::kScreenTopWidth;
|
||||
u16 FramebufferLayout::GetScalingRatio() const {
|
||||
return static_cast<u16>(((top_screen.GetWidth() - 1) / Core::kScreenTopWidth) + 1);
|
||||
}
|
||||
|
||||
// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
|
||||
|
@ -21,7 +21,7 @@ struct FramebufferLayout {
|
||||
* Returns the ration of pixel size of the top screen, compared to the native size of the 3DS
|
||||
* screen.
|
||||
*/
|
||||
float GetScalingRatio() const;
|
||||
u16 GetScalingRatio() const;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -58,7 +58,6 @@ void VMManager::Reset() {
|
||||
|
||||
page_table.pointers.fill(nullptr);
|
||||
page_table.attributes.fill(Memory::PageType::Unmapped);
|
||||
page_table.cached_res_count.fill(0);
|
||||
|
||||
UpdatePageTableForVMA(initial_vma);
|
||||
}
|
||||
|
@ -465,7 +465,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
|
||||
command.dma_request.size, Memory::FlushMode::Flush);
|
||||
Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address,
|
||||
command.dma_request.size,
|
||||
Memory::FlushMode::FlushAndInvalidate);
|
||||
Memory::FlushMode::Invalidate);
|
||||
|
||||
// TODO(Subv): These memory accesses should not go through the application's memory mapping.
|
||||
// They should go through the GSP module's memory mapping.
|
||||
|
@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) {
|
||||
u8* start = Memory::GetPhysicalPointer(start_addr);
|
||||
u8* end = Memory::GetPhysicalPointer(end_addr);
|
||||
|
||||
// TODO: Consider always accelerating and returning vector of
|
||||
// regions that the accelerated fill did not cover to
|
||||
// reduce/eliminate the fill that the cpu has to do.
|
||||
// This would also mean that the flush below is not needed.
|
||||
// Fill should first flush all surfaces that touch but are
|
||||
// not completely within the fill range.
|
||||
// Then fill all completely covered surfaces, and return the
|
||||
// regions that were between surfaces or within the touching
|
||||
// ones for cpu to manually fill here.
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
|
||||
return;
|
||||
|
||||
Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
|
||||
config.GetEndAddress() - config.GetStartAddress());
|
||||
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
|
||||
config.GetEndAddress() - config.GetStartAddress());
|
||||
|
||||
if (config.fill_24bit) {
|
||||
// fill with 24-bit values
|
||||
@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
|
||||
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
|
||||
|
||||
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
|
||||
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
|
||||
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
|
||||
|
||||
for (u32 y = 0; y < output_height; ++y) {
|
||||
for (u32 x = 0; x < output_width; ++x) {
|
||||
@ -363,8 +354,10 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
|
||||
|
||||
size_t contiguous_output_size =
|
||||
config.texture_copy.size / output_width * (output_width + output_gap);
|
||||
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
|
||||
static_cast<u32>(contiguous_output_size));
|
||||
// Only need to flush output if it has a gap
|
||||
const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion
|
||||
: Memory::RasterizerInvalidateRegion;
|
||||
FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
|
||||
|
||||
u32 remaining_input = input_width;
|
||||
u32 remaining_output = output_width;
|
||||
@ -570,4 +563,4 @@ void Shutdown() {
|
||||
LOG_DEBUG(HW_GPU, "shutdown OK");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace GPU
|
||||
|
@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page
|
||||
|
||||
page_table.attributes[base] = type;
|
||||
page_table.pointers[base] = memory;
|
||||
page_table.cached_res_count[base] = 0;
|
||||
|
||||
base += 1;
|
||||
if (memory != nullptr)
|
||||
@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) {
|
||||
ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
|
||||
break;
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate);
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
|
||||
std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
|
||||
break;
|
||||
}
|
||||
@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) {
|
||||
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
|
||||
break;
|
||||
case PageType::RasterizerCachedSpecial: {
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate);
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
|
||||
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
|
||||
break;
|
||||
}
|
||||
@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) {
|
||||
return target_pointer;
|
||||
}
|
||||
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) {
|
||||
if (start == 0) {
|
||||
return;
|
||||
}
|
||||
@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
}
|
||||
VAddr vaddr = *maybe_vaddr;
|
||||
|
||||
u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
|
||||
ASSERT_MSG(count_delta <= UINT8_MAX - res_count,
|
||||
"Rasterizer resource cache counter overflow!");
|
||||
ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
|
||||
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
|
||||
// Switch page type to cached if now cached
|
||||
if (res_count == 0) {
|
||||
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
if (cached) {
|
||||
// Switch page type to cached if now cached
|
||||
switch (page_type) {
|
||||
case PageType::Unmapped:
|
||||
// It is not necessary for a process to have this region mapped into its address
|
||||
@ -378,13 +373,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
res_count += count_delta;
|
||||
|
||||
// Switch page type to uncached if now uncached
|
||||
if (res_count == 0) {
|
||||
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
} else {
|
||||
// Switch page type to uncached if now uncached
|
||||
switch (page_type) {
|
||||
case PageType::Unmapped:
|
||||
// It is not necessary for a process to have this region mapped into its address
|
||||
@ -414,52 +404,69 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
}
|
||||
|
||||
void RasterizerFlushRegion(PAddr start, u32 size) {
|
||||
if (VideoCore::g_renderer != nullptr) {
|
||||
VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerInvalidateRegion(PAddr start, u32 size) {
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
|
||||
// Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
|
||||
// null here
|
||||
if (VideoCore::g_renderer != nullptr) {
|
||||
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) {
|
||||
// Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
|
||||
// null here
|
||||
if (VideoCore::g_renderer != nullptr) {
|
||||
VAddr end = start + size;
|
||||
|
||||
auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
|
||||
if (start >= region_end || end <= region_start) {
|
||||
// No overlap with region
|
||||
return;
|
||||
}
|
||||
|
||||
VAddr overlap_start = std::max(start, region_start);
|
||||
VAddr overlap_end = std::min(end, region_end);
|
||||
|
||||
PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value();
|
||||
u32 overlap_size = overlap_end - overlap_start;
|
||||
|
||||
auto* rasterizer = VideoCore::g_renderer->Rasterizer();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
rasterizer->FlushRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(VRAM_VADDR, VRAM_VADDR_END);
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VAddr end = start + size;
|
||||
|
||||
auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
|
||||
if (start >= region_end || end <= region_start) {
|
||||
// No overlap with region
|
||||
return;
|
||||
}
|
||||
|
||||
VAddr overlap_start = std::max(start, region_start);
|
||||
VAddr overlap_end = std::min(end, region_end);
|
||||
|
||||
PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value();
|
||||
u32 overlap_size = overlap_end - overlap_start;
|
||||
|
||||
auto* rasterizer = VideoCore::g_renderer->Rasterizer();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
rasterizer->FlushRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::Invalidate:
|
||||
rasterizer->InvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(VRAM_VADDR, VRAM_VADDR_END);
|
||||
}
|
||||
|
||||
u8 Read8(const VAddr addr) {
|
||||
@ -588,7 +595,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
@ -596,7 +603,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
@ -648,7 +655,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
@ -656,7 +663,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
handler->WriteBlock(current_vaddr, zeros.data(), copy_amount);
|
||||
break;
|
||||
}
|
||||
|
@ -72,12 +72,6 @@ struct PageTable {
|
||||
* the corresponding entry in `pointers` MUST be set to null.
|
||||
*/
|
||||
std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes;
|
||||
|
||||
/**
|
||||
* Indicates the number of externally cached resources touching a page that should be
|
||||
* flushed before the memory is accessed
|
||||
*/
|
||||
std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count;
|
||||
};
|
||||
|
||||
/// Physical memory regions as seen from the ARM11
|
||||
@ -245,16 +239,20 @@ boost::optional<VAddr> PhysicalToVirtualAddress(PAddr addr);
|
||||
u8* GetPhysicalPointer(PAddr address);
|
||||
|
||||
/**
|
||||
* Adds the supplied value to the rasterizer resource cache counter of each
|
||||
* page touching the region.
|
||||
* Mark each page touching the region as cached.
|
||||
*/
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
|
||||
|
||||
/**
|
||||
* Flushes any externally cached rasterizer resources touching the given region.
|
||||
*/
|
||||
void RasterizerFlushRegion(PAddr start, u32 size);
|
||||
|
||||
/**
|
||||
* Invalidates any externally cached rasterizer resources touching the given region.
|
||||
*/
|
||||
void RasterizerInvalidateRegion(PAddr start, u32 size);
|
||||
|
||||
/**
|
||||
* Flushes and invalidates any externally cached rasterizer resources touching the given region.
|
||||
*/
|
||||
@ -263,6 +261,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
|
||||
enum class FlushMode {
|
||||
/// Write back modified surfaces to RAM
|
||||
Flush,
|
||||
/// Remove region from the cache
|
||||
Invalidate,
|
||||
/// Write back modified surfaces to RAM, and also remove them from the cache
|
||||
FlushAndInvalidate,
|
||||
};
|
||||
|
@ -95,7 +95,7 @@ struct Values {
|
||||
// Renderer
|
||||
bool use_hw_renderer;
|
||||
bool use_shader_jit;
|
||||
float resolution_factor;
|
||||
u16 resolution_factor;
|
||||
bool use_vsync;
|
||||
bool use_frame_limit;
|
||||
u16 frame_limit;
|
||||
|
Reference in New Issue
Block a user