renderer_vulkan: Improve StreamBuffer API and use it in TextureRuntime
* Also use separate upload and download buffers optimized for write and readback respectively. This gives a huge 20+ FPS boost in most games which were bottlenecked by slow reads
This commit is contained in:
@ -23,7 +23,7 @@ inline T MakeInt(const std::byte* bytes) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <PixelFormat format, bool converted>
|
template <PixelFormat format, bool converted>
|
||||||
inline void DecodePixel(const std::byte* source, std::byte* dest) {
|
constexpr void DecodePixel(const std::byte* source, std::byte* dest) {
|
||||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||||
|
|
||||||
if constexpr (format == PixelFormat::D24S8) {
|
if constexpr (format == PixelFormat::D24S8) {
|
||||||
@ -69,7 +69,7 @@ inline void DecodePixel(const std::byte* source, std::byte* dest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <PixelFormat format>
|
template <PixelFormat format>
|
||||||
inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
constexpr void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||||
const u32 morton_offset = VideoCore::MortonInterleave(x, y);
|
const u32 morton_offset = VideoCore::MortonInterleave(x, y);
|
||||||
const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]);
|
const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]);
|
||||||
const u8 pixel = Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
|
const u8 pixel = Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
|
||||||
@ -84,7 +84,7 @@ inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte*
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <PixelFormat format>
|
template <PixelFormat format>
|
||||||
inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
constexpr void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||||
constexpr u32 subtile_width = 4;
|
constexpr u32 subtile_width = 4;
|
||||||
constexpr u32 subtile_height = 4;
|
constexpr u32 subtile_height = 4;
|
||||||
constexpr bool has_alpha = format == PixelFormat::ETC1A4;
|
constexpr bool has_alpha = format == PixelFormat::ETC1A4;
|
||||||
@ -114,7 +114,7 @@ inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byt
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <PixelFormat format, bool converted>
|
template <PixelFormat format, bool converted>
|
||||||
inline void EncodePixel(const std::byte* source, std::byte* dest) {
|
constexpr void EncodePixel(const std::byte* source, std::byte* dest) {
|
||||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||||
|
|
||||||
if constexpr (format == PixelFormat::D24S8) {
|
if constexpr (format == PixelFormat::D24S8) {
|
||||||
@ -146,8 +146,8 @@ inline void EncodePixel(const std::byte* source, std::byte* dest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool morton_to_linear, PixelFormat format, bool converted>
|
template <bool morton_to_linear, PixelFormat format, bool converted>
|
||||||
inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
constexpr void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||||
std::span<std::byte> linear_buffer) {
|
std::span<std::byte> linear_buffer) {
|
||||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||||
constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||||
constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
|
constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
|
||||||
@ -200,8 +200,9 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
|||||||
* in the linear_buffer.
|
* in the linear_buffer.
|
||||||
*/
|
*/
|
||||||
template <bool morton_to_linear, PixelFormat format, bool converted = false>
|
template <bool morton_to_linear, PixelFormat format, bool converted = false>
|
||||||
static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||||
std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer) {
|
std::span<std::byte> linear_buffer,
|
||||||
|
std::span<std::byte> tiled_buffer) {
|
||||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||||
constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||||
|
@ -948,22 +948,22 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
|||||||
|
|
||||||
surface->Download(download, staging);
|
surface->Download(download, staging);
|
||||||
|
|
||||||
download_queue.push_back(
|
MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start);
|
||||||
[this, surface, flush_start, flush_end, flush_info, mapped = staging.mapped]() {
|
if (!dest_ptr) [[unlikely]] {
|
||||||
MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start);
|
return;
|
||||||
if (!dest_ptr) [[unlikely]] {
|
}
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||||
|
|
||||||
if (surface->is_tiled) {
|
download_queue.push_back([this, surface, flush_start, flush_end, flush_info,
|
||||||
SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
mapped = staging.mapped, download_dest]() {
|
||||||
runtime.NeedsConvertion(surface->pixel_format));
|
if (surface->is_tiled) {
|
||||||
} else {
|
SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
||||||
runtime.FormatConvert(*surface, false, mapped, download_dest);
|
runtime.NeedsConvertion(surface->pixel_format));
|
||||||
}
|
} else {
|
||||||
});
|
runtime.FormatConvert(*surface, false, mapped, download_dest);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -962,11 +962,10 @@ void RendererVulkan::SwapBuffers() {
|
|||||||
void RendererVulkan::FlushBuffers() {
|
void RendererVulkan::FlushBuffers() {
|
||||||
vertex_buffer.Flush();
|
vertex_buffer.Flush();
|
||||||
rasterizer->FlushBuffers();
|
rasterizer->FlushBuffers();
|
||||||
renderpass_cache.ExitRenderpass();
|
runtime.FlushBuffers();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererVulkan::OnSlotSwitch() {
|
void RendererVulkan::OnSlotSwitch() {
|
||||||
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
|
|
||||||
renderpass_cache.OnSlotSwitch();
|
renderpass_cache.OnSlotSwitch();
|
||||||
rasterizer->pipeline_cache.MarkDirty();
|
rasterizer->pipeline_cache.MarkDirty();
|
||||||
}
|
}
|
||||||
|
@ -40,14 +40,18 @@ inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage)
|
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
|
||||||
: instance{instance} {
|
: instance{instance} {
|
||||||
|
const vk::BufferUsageFlags usage =
|
||||||
|
readback ? vk::BufferUsageFlagBits::eTransferDst : vk::BufferUsageFlagBits::eTransferSrc;
|
||||||
const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage};
|
const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage};
|
||||||
|
|
||||||
const VmaAllocationCreateInfo alloc_create_info = {
|
const VmaAllocationCreateFlags flags =
|
||||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
readback ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
|
||||||
VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
: VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
||||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST};
|
const VmaAllocationCreateInfo alloc_create_info = {.flags =
|
||||||
|
flags | VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||||
|
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST};
|
||||||
|
|
||||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||||
@ -66,9 +70,15 @@ StagingBuffer::~StagingBuffer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats)
|
bool readback)
|
||||||
: instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
|
: instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
|
||||||
staging{instance, total_size, vk::BufferUsageFlagBits::eTransferSrc}, usage{usage} {
|
staging{instance, total_size, readback}, bucket_size{size} {}
|
||||||
|
|
||||||
|
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||||
|
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
|
||||||
|
bool readback)
|
||||||
|
: instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
|
||||||
|
staging{instance, total_size, readback}, usage{usage}, bucket_size{size} {
|
||||||
|
|
||||||
const vk::BufferCreateInfo buffer_info = {
|
const vk::BufferCreateInfo buffer_info = {
|
||||||
.size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst};
|
.size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst};
|
||||||
@ -97,7 +107,6 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u
|
|||||||
}
|
}
|
||||||
|
|
||||||
view_count = view_formats.size();
|
view_count = view_formats.size();
|
||||||
bucket_size = size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamBuffer::~StreamBuffer() {
|
StreamBuffer::~StreamBuffer() {
|
||||||
@ -141,36 +150,60 @@ void StreamBuffer::Commit(u32 size) {
|
|||||||
|
|
||||||
void StreamBuffer::Flush() {
|
void StreamBuffer::Flush() {
|
||||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||||
|
const u32 flush_start = current_bucket * bucket_size;
|
||||||
const u32 flush_size = buckets[current_bucket].offset;
|
const u32 flush_size = buckets[current_bucket].offset;
|
||||||
ASSERT(flush_size <= bucket_size);
|
ASSERT(flush_size <= bucket_size);
|
||||||
|
|
||||||
if (flush_size > 0) {
|
if (flush_size > 0) [[likely]] {
|
||||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
// Ensure all staging writes are visible to the host memory domain
|
||||||
VmaAllocator allocator = instance.GetAllocator();
|
VmaAllocator allocator = instance.GetAllocator();
|
||||||
|
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||||
|
|
||||||
const u32 flush_start = current_bucket * bucket_size;
|
// Make the data available to the GPU if possible
|
||||||
const vk::BufferCopy copy_region = {
|
if (buffer) {
|
||||||
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
|
const vk::BufferCopy copy_region = {
|
||||||
|
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
|
||||||
|
|
||||||
vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
|
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||||
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
|
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
|
||||||
|
|
||||||
// Add pipeline barrier for the flushed region
|
// Add pipeline barrier for the flushed region
|
||||||
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
|
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
|
||||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||||
.dstAccessMask = access_mask,
|
.dstAccessMask = access_mask,
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.buffer = buffer,
|
.buffer = buffer,
|
||||||
.offset = flush_start,
|
.offset = flush_start,
|
||||||
.size = flush_size};
|
.size = flush_size};
|
||||||
|
|
||||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
|
||||||
|
{});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset the offset of the next bucket
|
SwitchBucket();
|
||||||
|
}
|
||||||
|
|
||||||
|
void StreamBuffer::Invalidate() {
|
||||||
|
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||||
|
const u32 flush_start = current_bucket * bucket_size;
|
||||||
|
const u32 flush_size = buckets[current_bucket].offset;
|
||||||
|
ASSERT(flush_size <= bucket_size);
|
||||||
|
|
||||||
|
if (flush_size > 0) [[likely]] {
|
||||||
|
// Ensure the staging memory can be read by the host
|
||||||
|
VmaAllocator allocator = instance.GetAllocator();
|
||||||
|
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
SwitchBucket();
|
||||||
|
}
|
||||||
|
|
||||||
|
void StreamBuffer::SwitchBucket() {
|
||||||
|
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||||
const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
|
const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
|
||||||
buckets[next_bucket].offset = 0;
|
buckets[next_bucket].offset = 0;
|
||||||
buckets[next_bucket].invalid = true;
|
buckets[next_bucket].invalid = true;
|
||||||
|
@ -19,13 +19,8 @@ class TaskScheduler;
|
|||||||
|
|
||||||
constexpr u32 MAX_BUFFER_VIEWS = 3;
|
constexpr u32 MAX_BUFFER_VIEWS = 3;
|
||||||
|
|
||||||
struct LockedRegion {
|
|
||||||
u32 size = 0;
|
|
||||||
u64 fence_counter = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct StagingBuffer {
|
struct StagingBuffer {
|
||||||
StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage);
|
StagingBuffer(const Instance& instance, u32 size, bool readback);
|
||||||
~StagingBuffer();
|
~StagingBuffer();
|
||||||
|
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
@ -36,10 +31,19 @@ struct StagingBuffer {
|
|||||||
|
|
||||||
class StreamBuffer {
|
class StreamBuffer {
|
||||||
public:
|
public:
|
||||||
|
/// Staging only constructor
|
||||||
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
|
bool readback = false);
|
||||||
|
/// Staging + GPU streaming constructor
|
||||||
|
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||||
|
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
|
||||||
|
bool readback = false);
|
||||||
~StreamBuffer();
|
~StreamBuffer();
|
||||||
|
|
||||||
|
StreamBuffer(const StreamBuffer&) = delete;
|
||||||
|
StreamBuffer& operator=(const StreamBuffer&) = delete;
|
||||||
|
|
||||||
|
/// Maps aligned staging memory of size bytes
|
||||||
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
||||||
|
|
||||||
/// Commits size bytes from the currently mapped staging memory
|
/// Commits size bytes from the currently mapped staging memory
|
||||||
@ -48,24 +52,28 @@ public:
|
|||||||
/// Flushes staging memory to the GPU buffer
|
/// Flushes staging memory to the GPU buffer
|
||||||
void Flush();
|
void Flush();
|
||||||
|
|
||||||
/// Returns the Vulkan buffer handle
|
/// Invalidates staging memory for reading
|
||||||
|
void Invalidate();
|
||||||
|
|
||||||
|
/// Switches to the next available bucket
|
||||||
|
void SwitchBucket();
|
||||||
|
|
||||||
|
/// Returns the GPU buffer handle
|
||||||
vk::Buffer GetHandle() const {
|
vk::Buffer GetHandle() const {
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the staging buffer handle
|
||||||
|
vk::Buffer GetStagingHandle() const {
|
||||||
|
return staging.buffer;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns an immutable reference to the requested buffer view
|
/// Returns an immutable reference to the requested buffer view
|
||||||
const vk::BufferView& GetView(u32 index = 0) const {
|
const vk::BufferView& GetView(u32 index = 0) const {
|
||||||
ASSERT(index < view_count);
|
ASSERT(index < view_count);
|
||||||
return views[index];
|
return views[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
/// Invalidates the buffer offsets
|
|
||||||
void Invalidate();
|
|
||||||
|
|
||||||
/// Removes the lock on regions whose fence counter has been reached by the GPU
|
|
||||||
bool UnlockFreeRegions(u32 target_size);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Bucket {
|
struct Bucket {
|
||||||
bool invalid;
|
bool invalid;
|
||||||
|
@ -32,19 +32,14 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
|
|||||||
return vk::ImageAspectFlagBits::eColor;
|
return vk::ImageAspectFlagBits::eColor;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024;
|
constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
|
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
|
|
||||||
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
|
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
|
||||||
RenderpassCache& renderpass_cache)
|
RenderpassCache& renderpass_cache)
|
||||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, blit_helper{
|
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
||||||
instance,
|
blit_helper{instance, scheduler}, upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE},
|
||||||
scheduler} {
|
download_buffer{instance, scheduler, DOWNLOAD_BUFFER_SIZE, true} {
|
||||||
|
|
||||||
for (auto& buffer : staging_buffers) {
|
|
||||||
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
|
|
||||||
vk::BufferUsageFlagBits::eTransferSrc |
|
|
||||||
vk::BufferUsageFlagBits::eTransferDst);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto Register = [this](VideoCore::PixelFormat dest,
|
auto Register = [this](VideoCore::PixelFormat dest,
|
||||||
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||||
@ -64,7 +59,9 @@ TextureRuntime::~TextureRuntime() {
|
|||||||
for (const auto& [key, alloc] : texture_recycler) {
|
for (const auto& [key, alloc] : texture_recycler) {
|
||||||
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
|
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
|
||||||
device.destroyImageView(alloc.image_view);
|
device.destroyImageView(alloc.image_view);
|
||||||
device.destroyImageView(alloc.base_view);
|
if (alloc.base_view) {
|
||||||
|
device.destroyImageView(alloc.base_view);
|
||||||
|
}
|
||||||
if (alloc.depth_view) {
|
if (alloc.depth_view) {
|
||||||
device.destroyImageView(alloc.depth_view);
|
device.destroyImageView(alloc.depth_view);
|
||||||
device.destroyImageView(alloc.stencil_view);
|
device.destroyImageView(alloc.stencil_view);
|
||||||
@ -82,29 +79,24 @@ TextureRuntime::~TextureRuntime() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
|
||||||
u32& offset = staging_offsets[current_slot];
|
|
||||||
// Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
|
// Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
|
||||||
offset = Common::AlignUp(offset, 4);
|
auto& buffer = upload ? upload_buffer : download_buffer;
|
||||||
|
auto [data, offset, invalidate] = buffer.Map(size, 4);
|
||||||
|
|
||||||
if (offset + size > STAGING_BUFFER_SIZE) {
|
return StagingData{.buffer = buffer.GetStagingHandle(),
|
||||||
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& buffer = staging_buffers[current_slot];
|
|
||||||
return StagingData{.buffer = buffer->buffer,
|
|
||||||
.size = size,
|
.size = size,
|
||||||
.mapped = buffer->mapped.subspan(offset, size),
|
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
||||||
.buffer_offset = offset};
|
.buffer_offset = offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureRuntime::Finish() {
|
void TextureRuntime::FlushBuffers() {
|
||||||
scheduler.Submit(SubmitMode::Flush);
|
upload_buffer.Flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
|
void TextureRuntime::Finish() {
|
||||||
staging_offsets[new_slot] = 0;
|
renderpass_cache.ExitRenderpass();
|
||||||
|
scheduler.Submit(SubmitMode::Flush);
|
||||||
|
download_buffer.Invalidate();
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||||
@ -112,6 +104,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
|||||||
const FormatTraits traits = instance.GetTraits(format);
|
const FormatTraits traits = instance.GetTraits(format);
|
||||||
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));
|
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));
|
||||||
|
|
||||||
|
// Depth buffers are not supposed to support blit by the spec so don't require it.
|
||||||
const bool is_suitable = traits.transfer_support && traits.attachment_support &&
|
const bool is_suitable = traits.transfer_support && traits.attachment_support &&
|
||||||
(traits.blit_support || aspect & vk::ImageAspectFlagBits::eDepth);
|
(traits.blit_support || aspect & vk::ImageAspectFlagBits::eDepth);
|
||||||
const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
|
const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
|
||||||
@ -416,12 +409,14 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
|
|||||||
.baseArrayLayer = blit.dst_layer,
|
.baseArrayLayer = blit.dst_layer,
|
||||||
.layerCount = 1},
|
.layerCount = 1},
|
||||||
.dstOffsets = dest_offsets};
|
.dstOffsets = dest_offsets};
|
||||||
|
|
||||||
// Don't use linear filtering on depth attachments
|
// Don't use linear filtering on depth attachments
|
||||||
const vk::Filter filtering =
|
const VideoCore::PixelFormat format = source.pixel_format;
|
||||||
blit_area.srcSubresource.aspectMask & vk::ImageAspectFlagBits::eDepth ||
|
const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 ||
|
||||||
blit_area.dstSubresource.aspectMask & vk::ImageAspectFlagBits::eDepth
|
format == VideoCore::PixelFormat::D24 ||
|
||||||
? vk::Filter::eNearest
|
format == VideoCore::PixelFormat::D16
|
||||||
: vk::Filter::eLinear;
|
? vk::Filter::eNearest
|
||||||
|
: vk::Filter::eLinear;
|
||||||
|
|
||||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||||
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||||
@ -682,8 +677,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
|||||||
InvalidateAllWatcher();
|
InvalidateAllWatcher();
|
||||||
|
|
||||||
// Lock this data until the next scheduler switch
|
// Lock this data until the next scheduler switch
|
||||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
runtime.upload_buffer.Commit(staging.size);
|
||||||
runtime.staging_offsets[current_slot] += staging.size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
|
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
|
||||||
@ -724,8 +718,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Lock this data until the next scheduler switch
|
// Lock this data until the next scheduler switch
|
||||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
runtime.download_buffer.Commit(staging.size);
|
||||||
runtime.staging_offsets[current_slot] += staging.size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 Surface::GetInternalBytesPerPixel() const {
|
u32 Surface::GetInternalBytesPerPixel() const {
|
||||||
@ -811,8 +804,7 @@ void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
|
|||||||
|
|
||||||
// For depth downloads create an R32UI surface and use a compute shader for convert.
|
// For depth downloads create an R32UI surface and use a compute shader for convert.
|
||||||
// Then we blit and download that surface.
|
// Then we blit and download that surface.
|
||||||
// NOTE: We don't need to set pixel format here since R32Uint automatically gives us
|
// NOTE: We keep the pixel format to D24S8 to avoid linear filtering during scale
|
||||||
// a storage view. Also the D24S8 creates a unique cache key for it
|
|
||||||
SurfaceParams r32_params = *this;
|
SurfaceParams r32_params = *this;
|
||||||
r32_params.width = scaled_rect.GetWidth();
|
r32_params.width = scaled_rect.GetWidth();
|
||||||
r32_params.stride = scaled_rect.GetWidth();
|
r32_params.stride = scaled_rect.GetWidth();
|
||||||
|
@ -104,6 +104,9 @@ public:
|
|||||||
VideoCore::TextureType type, vk::Format format,
|
VideoCore::TextureType type, vk::Format format,
|
||||||
vk::ImageUsageFlags usage);
|
vk::ImageUsageFlags usage);
|
||||||
|
|
||||||
|
/// Flushes staging buffers
|
||||||
|
void FlushBuffers();
|
||||||
|
|
||||||
/// Causes a GPU command flush
|
/// Causes a GPU command flush
|
||||||
void Finish();
|
void Finish();
|
||||||
|
|
||||||
@ -138,9 +141,6 @@ public:
|
|||||||
/// Returns true if the provided pixel format needs convertion
|
/// Returns true if the provided pixel format needs convertion
|
||||||
[[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const;
|
[[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const;
|
||||||
|
|
||||||
/// Performs operations that need to be done on every scheduler slot switch
|
|
||||||
void OnSlotSwitch(u32 new_slot);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Returns the current Vulkan instance
|
/// Returns the current Vulkan instance
|
||||||
const Instance& GetInstance() const {
|
const Instance& GetInstance() const {
|
||||||
@ -157,9 +157,9 @@ private:
|
|||||||
TaskScheduler& scheduler;
|
TaskScheduler& scheduler;
|
||||||
RenderpassCache& renderpass_cache;
|
RenderpassCache& renderpass_cache;
|
||||||
BlitHelper blit_helper;
|
BlitHelper blit_helper;
|
||||||
|
StreamBuffer upload_buffer;
|
||||||
|
StreamBuffer download_buffer;
|
||||||
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
||||||
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
|
|
||||||
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
|
|
||||||
std::unordered_multimap<HostTextureTag, ImageAlloc> texture_recycler;
|
std::unordered_multimap<HostTextureTag, ImageAlloc> texture_recycler;
|
||||||
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
|
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user