renderer_vulkan: Improve StreamBuffer API and use it in TextureRuntime
* Also use separate upload and download buffers optimized for write and readback respectively. This gives a huge 20+ FPS boost in most games which were bottlenecked by slow reads
This commit is contained in:
@ -23,7 +23,7 @@ inline T MakeInt(const std::byte* bytes) {
|
||||
}
|
||||
|
||||
template <PixelFormat format, bool converted>
|
||||
inline void DecodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr void DecodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
@ -69,7 +69,7 @@ inline void DecodePixel(const std::byte* source, std::byte* dest) {
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
constexpr void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
const u32 morton_offset = VideoCore::MortonInterleave(x, y);
|
||||
const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]);
|
||||
const u8 pixel = Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
|
||||
@ -84,7 +84,7 @@ inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte*
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
constexpr void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
constexpr u32 subtile_width = 4;
|
||||
constexpr u32 subtile_height = 4;
|
||||
constexpr bool has_alpha = format == PixelFormat::ETC1A4;
|
||||
@ -114,7 +114,7 @@ inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byt
|
||||
}
|
||||
|
||||
template <PixelFormat format, bool converted>
|
||||
inline void EncodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr void EncodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
@ -146,8 +146,8 @@ inline void EncodePixel(const std::byte* source, std::byte* dest) {
|
||||
}
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted>
|
||||
inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
std::span<std::byte> linear_buffer) {
|
||||
constexpr void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
std::span<std::byte> linear_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
|
||||
@ -200,8 +200,9 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
* in the linear_buffer.
|
||||
*/
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted = false>
|
||||
static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer) {
|
||||
static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> linear_buffer,
|
||||
std::span<std::byte> tiled_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
|
@ -948,22 +948,22 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
|
||||
surface->Download(download, staging);
|
||||
|
||||
download_queue.push_back(
|
||||
[this, surface, flush_start, flush_end, flush_info, mapped = staging.mapped]() {
|
||||
MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start);
|
||||
if (!dest_ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start);
|
||||
if (!dest_ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||
|
||||
if (surface->is_tiled) {
|
||||
SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
} else {
|
||||
runtime.FormatConvert(*surface, false, mapped, download_dest);
|
||||
}
|
||||
});
|
||||
download_queue.push_back([this, surface, flush_start, flush_end, flush_info,
|
||||
mapped = staging.mapped, download_dest]() {
|
||||
if (surface->is_tiled) {
|
||||
SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
} else {
|
||||
runtime.FormatConvert(*surface, false, mapped, download_dest);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
@ -962,11 +962,10 @@ void RendererVulkan::SwapBuffers() {
|
||||
void RendererVulkan::FlushBuffers() {
|
||||
vertex_buffer.Flush();
|
||||
rasterizer->FlushBuffers();
|
||||
renderpass_cache.ExitRenderpass();
|
||||
runtime.FlushBuffers();
|
||||
}
|
||||
|
||||
void RendererVulkan::OnSlotSwitch() {
|
||||
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
|
||||
renderpass_cache.OnSlotSwitch();
|
||||
rasterizer->pipeline_cache.MarkDirty();
|
||||
}
|
||||
|
@ -40,14 +40,18 @@ inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
|
||||
return result;
|
||||
}
|
||||
|
||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage)
|
||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
|
||||
: instance{instance} {
|
||||
const vk::BufferUsageFlags usage =
|
||||
readback ? vk::BufferUsageFlagBits::eTransferDst : vk::BufferUsageFlagBits::eTransferSrc;
|
||||
const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST};
|
||||
const VmaAllocationCreateFlags flags =
|
||||
readback ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
|
||||
: VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
||||
const VmaAllocationCreateInfo alloc_create_info = {.flags =
|
||||
flags | VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
@ -66,9 +70,15 @@ StagingBuffer::~StagingBuffer() {
|
||||
}
|
||||
|
||||
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats)
|
||||
bool readback)
|
||||
: instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
|
||||
staging{instance, total_size, vk::BufferUsageFlagBits::eTransferSrc}, usage{usage} {
|
||||
staging{instance, total_size, readback}, bucket_size{size} {}
|
||||
|
||||
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
|
||||
bool readback)
|
||||
: instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
|
||||
staging{instance, total_size, readback}, usage{usage}, bucket_size{size} {
|
||||
|
||||
const vk::BufferCreateInfo buffer_info = {
|
||||
.size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst};
|
||||
@ -97,7 +107,6 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u
|
||||
}
|
||||
|
||||
view_count = view_formats.size();
|
||||
bucket_size = size;
|
||||
}
|
||||
|
||||
StreamBuffer::~StreamBuffer() {
|
||||
@ -141,36 +150,60 @@ void StreamBuffer::Commit(u32 size) {
|
||||
|
||||
void StreamBuffer::Flush() {
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
const u32 flush_start = current_bucket * bucket_size;
|
||||
const u32 flush_size = buckets[current_bucket].offset;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
|
||||
if (flush_size > 0) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||
if (flush_size > 0) [[likely]] {
|
||||
// Ensure all staging writes are visible to the host memory domain
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||
|
||||
const u32 flush_start = current_bucket * bucket_size;
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
|
||||
// Make the data available to the GPU if possible
|
||||
if (buffer) {
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
|
||||
|
||||
vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
|
||||
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
|
||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
|
||||
|
||||
// Add pipeline barrier for the flushed region
|
||||
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = access_mask,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = buffer,
|
||||
.offset = flush_start,
|
||||
.size = flush_size};
|
||||
// Add pipeline barrier for the flushed region
|
||||
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = access_mask,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = buffer,
|
||||
.offset = flush_start,
|
||||
.size = flush_size};
|
||||
|
||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
|
||||
{});
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the offset of the next bucket
|
||||
SwitchBucket();
|
||||
}
|
||||
|
||||
void StreamBuffer::Invalidate() {
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
const u32 flush_start = current_bucket * bucket_size;
|
||||
const u32 flush_size = buckets[current_bucket].offset;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
|
||||
if (flush_size > 0) [[likely]] {
|
||||
// Ensure the staging memory can be read by the host
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||
}
|
||||
|
||||
SwitchBucket();
|
||||
}
|
||||
|
||||
void StreamBuffer::SwitchBucket() {
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
|
||||
buckets[next_bucket].offset = 0;
|
||||
buckets[next_bucket].invalid = true;
|
||||
|
@ -19,13 +19,8 @@ class TaskScheduler;
|
||||
|
||||
constexpr u32 MAX_BUFFER_VIEWS = 3;
|
||||
|
||||
struct LockedRegion {
|
||||
u32 size = 0;
|
||||
u64 fence_counter = 0;
|
||||
};
|
||||
|
||||
struct StagingBuffer {
|
||||
StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage);
|
||||
StagingBuffer(const Instance& instance, u32 size, bool readback);
|
||||
~StagingBuffer();
|
||||
|
||||
const Instance& instance;
|
||||
@ -36,10 +31,19 @@ struct StagingBuffer {
|
||||
|
||||
class StreamBuffer {
|
||||
public:
|
||||
/// Staging only constructor
|
||||
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
|
||||
bool readback = false);
|
||||
/// Staging + GPU streaming constructor
|
||||
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
|
||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
|
||||
bool readback = false);
|
||||
~StreamBuffer();
|
||||
|
||||
StreamBuffer(const StreamBuffer&) = delete;
|
||||
StreamBuffer& operator=(const StreamBuffer&) = delete;
|
||||
|
||||
/// Maps aligned staging memory of size bytes
|
||||
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
||||
|
||||
/// Commits size bytes from the currently mapped staging memory
|
||||
@ -48,24 +52,28 @@ public:
|
||||
/// Flushes staging memory to the GPU buffer
|
||||
void Flush();
|
||||
|
||||
/// Returns the Vulkan buffer handle
|
||||
/// Invalidates staging memory for reading
|
||||
void Invalidate();
|
||||
|
||||
/// Switches to the next available bucket
|
||||
void SwitchBucket();
|
||||
|
||||
/// Returns the GPU buffer handle
|
||||
vk::Buffer GetHandle() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/// Returns the staging buffer handle
|
||||
vk::Buffer GetStagingHandle() const {
|
||||
return staging.buffer;
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the requested buffer view
|
||||
const vk::BufferView& GetView(u32 index = 0) const {
|
||||
ASSERT(index < view_count);
|
||||
return views[index];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Invalidates the buffer offsets
|
||||
void Invalidate();
|
||||
|
||||
/// Removes the lock on regions whose fence counter has been reached by the GPU
|
||||
bool UnlockFreeRegions(u32 target_size);
|
||||
|
||||
private:
|
||||
struct Bucket {
|
||||
bool invalid;
|
||||
|
@ -32,19 +32,14 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
}
|
||||
|
||||
constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, blit_helper{
|
||||
instance,
|
||||
scheduler} {
|
||||
|
||||
for (auto& buffer : staging_buffers) {
|
||||
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
|
||||
vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst);
|
||||
}
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
||||
blit_helper{instance, scheduler}, upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE},
|
||||
download_buffer{instance, scheduler, DOWNLOAD_BUFFER_SIZE, true} {
|
||||
|
||||
auto Register = [this](VideoCore::PixelFormat dest,
|
||||
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||
@ -64,7 +59,9 @@ TextureRuntime::~TextureRuntime() {
|
||||
for (const auto& [key, alloc] : texture_recycler) {
|
||||
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
|
||||
device.destroyImageView(alloc.image_view);
|
||||
device.destroyImageView(alloc.base_view);
|
||||
if (alloc.base_view) {
|
||||
device.destroyImageView(alloc.base_view);
|
||||
}
|
||||
if (alloc.depth_view) {
|
||||
device.destroyImageView(alloc.depth_view);
|
||||
device.destroyImageView(alloc.stencil_view);
|
||||
@ -82,29 +79,24 @@ TextureRuntime::~TextureRuntime() {
|
||||
}
|
||||
|
||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
||||
u32& offset = staging_offsets[current_slot];
|
||||
// Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
|
||||
offset = Common::AlignUp(offset, 4);
|
||||
auto& buffer = upload ? upload_buffer : download_buffer;
|
||||
auto [data, offset, invalidate] = buffer.Map(size, 4);
|
||||
|
||||
if (offset + size > STAGING_BUFFER_SIZE) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const auto& buffer = staging_buffers[current_slot];
|
||||
return StagingData{.buffer = buffer->buffer,
|
||||
return StagingData{.buffer = buffer.GetStagingHandle(),
|
||||
.size = size,
|
||||
.mapped = buffer->mapped.subspan(offset, size),
|
||||
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
||||
.buffer_offset = offset};
|
||||
}
|
||||
|
||||
void TextureRuntime::Finish() {
|
||||
scheduler.Submit(SubmitMode::Flush);
|
||||
void TextureRuntime::FlushBuffers() {
|
||||
upload_buffer.Flush();
|
||||
}
|
||||
|
||||
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
|
||||
staging_offsets[new_slot] = 0;
|
||||
void TextureRuntime::Finish() {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
scheduler.Submit(SubmitMode::Flush);
|
||||
download_buffer.Invalidate();
|
||||
}
|
||||
|
||||
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
@ -112,6 +104,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
const FormatTraits traits = instance.GetTraits(format);
|
||||
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));
|
||||
|
||||
// Depth buffers are not supposed to support blit by the spec so don't require it.
|
||||
const bool is_suitable = traits.transfer_support && traits.attachment_support &&
|
||||
(traits.blit_support || aspect & vk::ImageAspectFlagBits::eDepth);
|
||||
const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
|
||||
@ -416,12 +409,14 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
|
||||
.baseArrayLayer = blit.dst_layer,
|
||||
.layerCount = 1},
|
||||
.dstOffsets = dest_offsets};
|
||||
|
||||
// Don't use linear filtering on depth attachments
|
||||
const vk::Filter filtering =
|
||||
blit_area.srcSubresource.aspectMask & vk::ImageAspectFlagBits::eDepth ||
|
||||
blit_area.dstSubresource.aspectMask & vk::ImageAspectFlagBits::eDepth
|
||||
? vk::Filter::eNearest
|
||||
: vk::Filter::eLinear;
|
||||
const VideoCore::PixelFormat format = source.pixel_format;
|
||||
const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 ||
|
||||
format == VideoCore::PixelFormat::D24 ||
|
||||
format == VideoCore::PixelFormat::D16
|
||||
? vk::Filter::eNearest
|
||||
: vk::Filter::eLinear;
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
@ -682,8 +677,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
||||
InvalidateAllWatcher();
|
||||
|
||||
// Lock this data until the next scheduler switch
|
||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
||||
runtime.staging_offsets[current_slot] += staging.size;
|
||||
runtime.upload_buffer.Commit(staging.size);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
|
||||
@ -724,8 +718,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
}
|
||||
|
||||
// Lock this data until the next scheduler switch
|
||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
||||
runtime.staging_offsets[current_slot] += staging.size;
|
||||
runtime.download_buffer.Commit(staging.size);
|
||||
}
|
||||
|
||||
u32 Surface::GetInternalBytesPerPixel() const {
|
||||
@ -811,8 +804,7 @@ void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
|
||||
|
||||
// For depth downloads create an R32UI surface and use a compute shader for convert.
|
||||
// Then we blit and download that surface.
|
||||
// NOTE: We don't need to set pixel format here since R32Uint automatically gives us
|
||||
// a storage view. Also the D24S8 creates a unique cache key for it
|
||||
// NOTE: We keep the pixel format to D24S8 to avoid linear filtering during scale
|
||||
SurfaceParams r32_params = *this;
|
||||
r32_params.width = scaled_rect.GetWidth();
|
||||
r32_params.stride = scaled_rect.GetWidth();
|
||||
|
@ -104,6 +104,9 @@ public:
|
||||
VideoCore::TextureType type, vk::Format format,
|
||||
vk::ImageUsageFlags usage);
|
||||
|
||||
/// Flushes staging buffers
|
||||
void FlushBuffers();
|
||||
|
||||
/// Causes a GPU command flush
|
||||
void Finish();
|
||||
|
||||
@ -138,9 +141,6 @@ public:
|
||||
/// Returns true if the provided pixel format needs convertion
|
||||
[[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const;
|
||||
|
||||
/// Performs operations that need to be done on every scheduler slot switch
|
||||
void OnSlotSwitch(u32 new_slot);
|
||||
|
||||
private:
|
||||
/// Returns the current Vulkan instance
|
||||
const Instance& GetInstance() const {
|
||||
@ -157,9 +157,9 @@ private:
|
||||
TaskScheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
BlitHelper blit_helper;
|
||||
StreamBuffer upload_buffer;
|
||||
StreamBuffer download_buffer;
|
||||
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
||||
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
|
||||
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
|
||||
std::unordered_multimap<HostTextureTag, ImageAlloc> texture_recycler;
|
||||
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
|
||||
};
|
||||
|
Reference in New Issue
Block a user