renderer_vulkan: Rework format handling
* This is a pretty large commit that aims to solve some issues with the current format system * The instance now builds at application initialization an array of format traits for each pixel format that includes information such as blit/attachment/storage support and fallback formats * The runtime doesn't ask the instance for formats but receives these traits and can dedice on its own what to build For now we do the same as before, we require both blit and attachment support * Morton swizzling also sees many bug fixes. The previous code was very hacky and didn't work for partial texture updates. It was also inconsistent, as it would take a tiled_buffer and write to the middle of linear * Now the functions have been greatly simplified and adjusted to work better with std::span. This fixes out of bounds errors and texture glitches (like the display in Mario Kart 7)
This commit is contained in:
@ -7,6 +7,7 @@
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
|
@ -135,42 +135,53 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::sp
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Performs morton to/from linear convertions on the provided pixel data
|
||||
* @param width, height The dimentions of the rectangular region of pixels in linear_buffer
|
||||
* @param start_offset The number of bytes from the start of the first tile to the start of tiled_buffer
|
||||
* @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer
|
||||
* @param linear_buffer The linear pixel data
|
||||
* @param tiled_buffer The tiled pixel data
|
||||
*
|
||||
* The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for converting between
|
||||
* linear and morton tiled layouts. The function handles both convertions but there are slightly different
|
||||
* paths and inputs for each:
|
||||
*
|
||||
* Morton to Linear:
|
||||
* During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the linear rectangle
|
||||
* spans multiple vertical tiles. linear_buffer does not reference the entire texture area, but rather the
|
||||
* specific rectangle affected by the upload.
|
||||
*
|
||||
* Linear to Morton:
|
||||
* This is similar to the other convertion but with some differences. In this case tiled_buffer is not required
|
||||
* to be aligned to any specific boundary which requires special care. start_offset/end_offset are useful
|
||||
* here as they tell us exactly where the data should be placed in the linear_buffer.
|
||||
*/
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> linear_buffer,
|
||||
std::span<std::byte> tiled_buffer) {
|
||||
|
||||
static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
||||
|
||||
// We could use bytes_per_pixel here but it should be avoided because it
|
||||
// becomes zero for 4-bit textures!
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel;
|
||||
|
||||
// Does this line have any significance?
|
||||
//u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
|
||||
u32 linear_offset = 0;
|
||||
u32 tiled_offset = 0;
|
||||
|
||||
const PAddr aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
|
||||
const PAddr aligned_start_offset = Common::AlignUp(start_offset, tile_size);
|
||||
PAddr aligned_end_offset = Common::AlignDown(end_offset, tile_size);
|
||||
const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel;
|
||||
const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
|
||||
const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
|
||||
const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
|
||||
|
||||
ASSERT(!morton_to_linear || (aligned_start_offset == start_offset && aligned_end_offset == end_offset));
|
||||
|
||||
const u32 begin_pixel_index = aligned_down_start_offset * 8 / GetFormatBpp(format);
|
||||
u32 x = (begin_pixel_index % (stride * 8)) / 8;
|
||||
u32 y = (begin_pixel_index / (stride * 8)) * 8;
|
||||
|
||||
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
||||
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
||||
// the shader we read/write the linear buffer backwards
|
||||
linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
|
||||
// the shader we read/write the linear buffer from the bottom up
|
||||
u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
|
||||
u32 tiled_offset = 0;
|
||||
u32 x = 0;
|
||||
u32 y = 0;
|
||||
|
||||
auto linear_next_tile = [&] {
|
||||
x = (x + 8) % stride;
|
||||
const auto LinearNextTile = [&] {
|
||||
x = (x + 8) % width;
|
||||
linear_offset += 8 * aligned_bytes_per_pixel;
|
||||
if (!x) {
|
||||
y = (y + 8) % height;
|
||||
@ -178,7 +189,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
|
||||
return;
|
||||
}
|
||||
|
||||
linear_offset -= stride * 9 * aligned_bytes_per_pixel;
|
||||
linear_offset -= width * 9 * aligned_bytes_per_pixel;
|
||||
}
|
||||
};
|
||||
|
||||
@ -186,31 +197,31 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (start_offset < aligned_start_offset && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.last(linear_buffer.size_bytes() - linear_offset);
|
||||
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format>(width, tmp_buf, linear_data);
|
||||
|
||||
std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
|
||||
std::min(aligned_start_offset, end_offset) - start_offset);
|
||||
|
||||
tiled_offset += aligned_start_offset - start_offset;
|
||||
linear_next_tile();
|
||||
LinearNextTile();
|
||||
}
|
||||
|
||||
const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset;
|
||||
while (tiled_offset < buffer_end) {
|
||||
auto linear_data = linear_buffer.last(linear_buffer.size_bytes() - linear_offset);
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
|
||||
MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data);
|
||||
MortonCopyTile<morton_to_linear, format>(width, tiled_data, linear_data);
|
||||
tiled_offset += tile_size;
|
||||
linear_next_tile();
|
||||
LinearNextTile();
|
||||
}
|
||||
|
||||
// If during a texture download the end coordinate is not tile aligned, swizzle
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
|
||||
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format>(width, tmp_buf, linear_data);
|
||||
std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end_offset - aligned_end_offset);
|
||||
}
|
||||
}
|
||||
|
@ -163,7 +163,8 @@ private:
|
||||
SurfaceMap dirty_regions;
|
||||
SurfaceSet remove_surfaces;
|
||||
u16 resolution_scale_factor;
|
||||
|
||||
std::vector<std::function<void()>> download_queue;
|
||||
std::vector<std::byte> staging_buffer;
|
||||
std::unordered_map<TextureCubeConfig, Surface> texture_cube_cache;
|
||||
std::recursive_mutex mutex;
|
||||
};
|
||||
@ -269,9 +270,26 @@ bool RasterizerCache<T>::BlitSurfaces(const Surface& src_surface, Common::Rectan
|
||||
const Surface& dst_surface, Common::Rectangle<u32> dst_rect) {
|
||||
MICROPROFILE_SCOPE(RasterizerCache_BlitSurface);
|
||||
|
||||
if (CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
|
||||
dst_surface->InvalidateAllWatcher();
|
||||
if (!CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) [[unlikely]] {
|
||||
return false;
|
||||
}
|
||||
|
||||
dst_surface->InvalidateAllWatcher();
|
||||
|
||||
// Prefer texture copy over blit if possible
|
||||
if (src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.bottom < src_rect.top) {
|
||||
const TextureCopy texture_copy = {
|
||||
.src_level = 0,
|
||||
.dst_level = 0,
|
||||
.src_layer = 0,
|
||||
.dst_layer = 0,
|
||||
.src_offset = {src_rect.left, src_rect.bottom},
|
||||
.dst_offset = {dst_rect.left, dst_rect.bottom},
|
||||
.extent = {src_rect.GetWidth(), src_rect.GetHeight()}
|
||||
};
|
||||
|
||||
return runtime.CopyTextures(*src_surface, *dst_surface, texture_copy);
|
||||
} else {
|
||||
const TextureBlit texture_blit = {
|
||||
.src_level = 0,
|
||||
.dst_level = 0,
|
||||
@ -283,8 +301,6 @@ bool RasterizerCache<T>::BlitSurfaces(const Surface& src_surface, Common::Rectan
|
||||
|
||||
return runtime.BlitTextures(*src_surface, *dst_surface, texture_blit);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
MICROPROFILE_DECLARE(RasterizerCache_CopySurface);
|
||||
@ -888,35 +904,31 @@ void RasterizerCache<T>::ValidateSurface(const Surface& surface, PAddr addr, u32
|
||||
MICROPROFILE_DECLARE(RasterizerCache_SurfaceLoad);
|
||||
template <class T>
|
||||
void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval interval) {
|
||||
const SurfaceParams info = surface->FromInterval(interval);
|
||||
const u32 load_start = info.addr;
|
||||
const u32 load_end = info.end;
|
||||
ASSERT(load_start >= surface->addr && load_end <= surface->end);
|
||||
const SurfaceParams load_info = surface->FromInterval(interval);
|
||||
ASSERT(load_info.addr >= surface->addr && load_info.end <= surface->end);
|
||||
|
||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
||||
|
||||
const auto& staging = runtime.FindStaging(
|
||||
surface->width * surface->height * 4, true);
|
||||
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr);
|
||||
load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true);
|
||||
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(load_info.addr);
|
||||
if (!source_ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start);
|
||||
|
||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
||||
|
||||
const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr);
|
||||
if (surface->is_tiled) {
|
||||
std::vector<std::byte> unswizzled_data(staging.size);
|
||||
UnswizzleTexture(*surface, load_start - surface->addr, load_end - surface->addr,
|
||||
upload_data, unswizzled_data);
|
||||
runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped);
|
||||
std::vector<std::byte> unswizzled_data(load_info.width * load_info.height * GetBytesPerPixel(load_info.pixel_format));
|
||||
UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, unswizzled_data);
|
||||
runtime.FormatConvert(*surface, true, unswizzled_data, staging.mapped);
|
||||
} else {
|
||||
runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped);
|
||||
runtime.FormatConvert(*surface, true, upload_data, staging.mapped);
|
||||
}
|
||||
|
||||
const BufferTextureCopy upload = {
|
||||
.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
.texture_rect = surface->GetSubRect(info),
|
||||
.texture_rect = surface->GetSubRect(load_info),
|
||||
.texture_level = 0
|
||||
};
|
||||
|
||||
@ -926,17 +938,17 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
|
||||
MICROPROFILE_DECLARE(RasterizerCache_SurfaceFlush);
|
||||
template <class T>
|
||||
void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval interval) {
|
||||
const SurfaceParams flush_info = surface->FromInterval(interval);
|
||||
const u32 flush_start = boost::icl::first(interval);
|
||||
const u32 flush_end = boost::icl::last_next(interval);
|
||||
ASSERT(flush_start >= surface->addr && flush_end <= surface->end);
|
||||
|
||||
const auto& staging = runtime.FindStaging(
|
||||
surface->width * surface->height * 4, false);
|
||||
const SurfaceParams params = surface->FromInterval(interval);
|
||||
flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false);
|
||||
const BufferTextureCopy download = {
|
||||
.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
.texture_rect = surface->GetSubRect(params),
|
||||
.texture_rect = surface->GetSubRect(flush_info),
|
||||
.texture_level = 0
|
||||
};
|
||||
|
||||
@ -948,17 +960,15 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
}
|
||||
|
||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||
|
||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
|
||||
|
||||
if (surface->is_tiled) {
|
||||
std::vector<std::byte> swizzled_data(staging.size);
|
||||
runtime.FormatConvert(surface->pixel_format, false, swizzled_data, swizzled_data);
|
||||
SwizzleTexture(*surface, flush_start - surface->addr, flush_end - surface->addr,
|
||||
staging.mapped, download_dest);
|
||||
} else {
|
||||
runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest);
|
||||
}
|
||||
download_queue.push_back([this, surface, download_dest, flush_start, flush_end, flush_info, mapped = staging.mapped]() {
|
||||
if (surface->is_tiled) {
|
||||
std::vector<std::byte> temp_data(flush_info.width * flush_info.height * GetBytesPerPixel(flush_info.pixel_format));
|
||||
runtime.FormatConvert(*surface, false, mapped, temp_data);
|
||||
SwizzleTexture(flush_info, flush_start, flush_end, temp_data, download_dest);
|
||||
} else {
|
||||
runtime.FormatConvert(*surface, false, mapped, download_dest);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -1135,6 +1145,17 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, Surface flush_surface
|
||||
flushed_intervals += interval;
|
||||
}
|
||||
|
||||
// Batch execute all requested downloads. This gives more time for them to complete
|
||||
// before we issue the CPU to GPU flush and reduces scheduler slot switches in Vulkan
|
||||
if (!download_queue.empty()) {
|
||||
runtime.Finish();
|
||||
for (auto& download_func : download_queue) {
|
||||
download_func();
|
||||
}
|
||||
|
||||
download_queue.clear();
|
||||
}
|
||||
|
||||
// Reset dirty regions
|
||||
dirty_regions -= flushed_intervals;
|
||||
}
|
||||
|
@ -41,6 +41,8 @@ struct TextureClear {
|
||||
struct TextureCopy {
|
||||
u32 src_level;
|
||||
u32 dst_level;
|
||||
u32 src_layer;
|
||||
u32 dst_layer;
|
||||
Offset src_offset;
|
||||
Offset dst_offset;
|
||||
Extent extent;
|
||||
|
@ -11,18 +11,22 @@
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
|
||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||
const u32 func_index = static_cast<u32>(swizzle_info.pixel_format);
|
||||
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
|
||||
SwizzleImpl(params.stride, params.height, start_offset, end_offset, source_linear, dest_tiled);
|
||||
SwizzleImpl(swizzle_info.width, swizzle_info.height,
|
||||
start_addr - swizzle_info.addr, end_addr - swizzle_info.addr,
|
||||
source_linear, dest_tiled);
|
||||
}
|
||||
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
|
||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||
const u32 func_index = static_cast<u32>(unswizzle_info.pixel_format);
|
||||
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
|
||||
UnswizzleImpl(params.stride, params.height, start_offset, end_offset, dest_linear, source_tiled);
|
||||
UnswizzleImpl(unswizzle_info.width, unswizzle_info.height,
|
||||
start_addr - unswizzle_info.addr, end_addr - unswizzle_info.addr,
|
||||
dest_linear, source_tiled);
|
||||
}
|
||||
|
||||
ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) {
|
||||
|
@ -44,19 +44,29 @@ class SurfaceParams;
|
||||
|
||||
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
|
||||
|
||||
/**
|
||||
* Converts a morton swizzled texture to linear format.
|
||||
*
|
||||
* @param params Structure used to query the surface information.
|
||||
* @param start_offset Is the offset at which the source_tiled span begins
|
||||
* @param unswizzle_info Structure used to query the surface information.
|
||||
* @param start_addr The start address of the source_tiled data.
|
||||
* @param end_addr The end address of the source_tiled data.
|
||||
* @param source_tiled The tiled data to convert.
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
*/
|
||||
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
|
||||
|
||||
/**
|
||||
* Swizzles a linear texture according to the morton code.
|
||||
*
|
||||
* @param swizzle_info Structure used to query the surface information.
|
||||
* @param start_addr The start address of the dest_tiled data.
|
||||
* @param end_addr The end address of the dest_tiled data.
|
||||
* @param source_tiled The source morton swizzled data.
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
*/
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
|
||||
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
|
@ -124,8 +124,9 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f
|
||||
return DEFAULT_TUPLE;
|
||||
}
|
||||
|
||||
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
void TextureRuntime::FormatConvert(const Surface& surface, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest) {
|
||||
const VideoCore::PixelFormat format = surface.pixel_format;
|
||||
if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) {
|
||||
Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
} else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) {
|
||||
|
@ -70,8 +70,10 @@ public:
|
||||
/// Returns the OpenGL format tuple associated with the provided pixel format
|
||||
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
|
||||
|
||||
void Finish() const {}
|
||||
|
||||
/// Performs required format convertions on the staging data
|
||||
void FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
void FormatConvert(const Surface& surface, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
/// Allocates an OpenGL texture with the specified dimentions and format
|
||||
@ -136,6 +138,11 @@ public:
|
||||
/// Downloads pixel data to staging from a rectangle region of the surface texture
|
||||
void Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging);
|
||||
|
||||
/// Returns the bpp of the internal surface format
|
||||
u32 GetInternalBytesPerPixel() const {
|
||||
return VideoCore::GetBytesPerPixel(pixel_format);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Downloads scaled image by downscaling the requested rectangle
|
||||
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
|
||||
|
@ -183,7 +183,7 @@ static std::array<float, 3 * 2> MakeOrthographicMatrix(float width, float height
|
||||
}
|
||||
|
||||
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
|
||||
: RendererBase{window}, instance{window}, scheduler{instance}, renderpass_cache{instance, scheduler},
|
||||
: RendererBase{window}, instance{window}, scheduler{instance, *this}, renderpass_cache{instance, scheduler},
|
||||
runtime{instance, scheduler, renderpass_cache}, swapchain{instance, renderpass_cache},
|
||||
vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}} {
|
||||
|
||||
@ -626,7 +626,7 @@ void RendererVulkan::BuildPipelines() {
|
||||
|
||||
void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer) {
|
||||
TextureInfo old_texture = texture;
|
||||
texture = TextureInfo {
|
||||
texture = TextureInfo{
|
||||
.alloc = runtime.Allocate(framebuffer.width, framebuffer.height,
|
||||
VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format),
|
||||
VideoCore::TextureType::Texture2D),
|
||||
@ -1035,21 +1035,24 @@ void RendererVulkan::SwapBuffers() {
|
||||
|
||||
DrawScreens(layout, false);
|
||||
|
||||
// Flush all buffers to make the data visible to the GPU before submitting
|
||||
rasterizer->FlushBuffers();
|
||||
vertex_buffer.Flush();
|
||||
|
||||
scheduler.Submit(SubmitMode::SwapchainSynced);
|
||||
swapchain.Present(present_ready);
|
||||
}
|
||||
|
||||
// Inform texture runtime about the switch
|
||||
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
|
||||
void RendererVulkan::FlushBuffers() {
|
||||
vertex_buffer.Flush();
|
||||
rasterizer->FlushBuffers();
|
||||
}
|
||||
|
||||
void RendererVulkan::OnSlotSwitch() {
|
||||
// When the command buffer switches, all state becomes undefined.
|
||||
// This is problematic with dynamic states, so set all states here
|
||||
if (instance.IsExtendedDynamicStateSupported()) {
|
||||
rasterizer->SyncFixedState();
|
||||
}
|
||||
|
||||
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
|
||||
rasterizer->pipeline_cache.MarkDirty();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -72,6 +72,8 @@ public:
|
||||
void PrepareVideoDumping() override {}
|
||||
void CleanupVideoDumping() override {}
|
||||
void Sync() override;
|
||||
void FlushBuffers();
|
||||
void OnSlotSwitch();
|
||||
|
||||
private:
|
||||
void ReloadSampler();
|
||||
|
@ -26,11 +26,11 @@ constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
case vk::Format::eD32Sfloat:
|
||||
return vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
|
@ -11,6 +11,33 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
|
||||
switch (format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return vk::Format::eB8G8R8Unorm;
|
||||
case VideoCore::PixelFormat::RGB5A1:
|
||||
return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case VideoCore::PixelFormat::RGB565:
|
||||
return vk::Format::eR5G6B5UnormPack16;
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
case VideoCore::PixelFormat::D16:
|
||||
return vk::Format::eD16Unorm;
|
||||
case VideoCore::PixelFormat::D24:
|
||||
return vk::Format::eX8D24UnormPack32;
|
||||
case VideoCore::PixelFormat::D24S8:
|
||||
return vk::Format::eD24UnormS8Uint;
|
||||
case VideoCore::PixelFormat::Invalid:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
|
||||
return vk::Format::eUndefined;
|
||||
default:
|
||||
// Use default case for the texture formats
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
Instance::Instance(Frontend::EmuWindow& window) {
|
||||
auto window_info = window.GetWindowInfo();
|
||||
|
||||
@ -54,6 +81,7 @@ Instance::Instance(Frontend::EmuWindow& window) {
|
||||
device_properties = physical_device.getProperties();
|
||||
|
||||
CreateDevice();
|
||||
CreateFormatTable();
|
||||
}
|
||||
|
||||
Instance::~Instance() {
|
||||
@ -64,50 +92,99 @@ Instance::~Instance() {
|
||||
instance.destroy();
|
||||
}
|
||||
|
||||
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
|
||||
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
|
||||
if (auto it = supported.find(format); it != supported.end()) {
|
||||
return (it->second.optimalTilingFeatures & usage) == usage;
|
||||
FormatTraits Instance::GetTraits(VideoCore::PixelFormat pixel_format) const {
|
||||
if (pixel_format == VideoCore::PixelFormat::Invalid) [[unlikely]] {
|
||||
return FormatTraits{};
|
||||
}
|
||||
|
||||
// Cache format properties so we don't have to query the driver all the time
|
||||
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
|
||||
supported.insert(std::make_pair(format, properties));
|
||||
|
||||
return (properties.optimalTilingFeatures & usage) == usage;
|
||||
const u32 index = static_cast<u32>(pixel_format);
|
||||
return format_table[index];
|
||||
}
|
||||
|
||||
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
|
||||
if (format == vk::Format::eUndefined) {
|
||||
return format;
|
||||
}
|
||||
void Instance::CreateFormatTable() {
|
||||
constexpr std::array pixel_formats = {
|
||||
VideoCore::PixelFormat::RGBA8,
|
||||
VideoCore::PixelFormat::RGB8,
|
||||
VideoCore::PixelFormat::RGB5A1,
|
||||
VideoCore::PixelFormat::RGB565,
|
||||
VideoCore::PixelFormat::RGBA4,
|
||||
VideoCore::PixelFormat::IA8,
|
||||
VideoCore::PixelFormat::RG8,
|
||||
VideoCore::PixelFormat::I8,
|
||||
VideoCore::PixelFormat::A8,
|
||||
VideoCore::PixelFormat::IA4,
|
||||
VideoCore::PixelFormat::I4,
|
||||
VideoCore::PixelFormat::A4,
|
||||
VideoCore::PixelFormat::ETC1,
|
||||
VideoCore::PixelFormat::ETC1A4,
|
||||
VideoCore::PixelFormat::D16,
|
||||
VideoCore::PixelFormat::D24,
|
||||
VideoCore::PixelFormat::D24S8
|
||||
};
|
||||
|
||||
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
|
||||
if (IsFormatSupported(format, features)) {
|
||||
return format;
|
||||
}
|
||||
const vk::FormatFeatureFlags storage_usage = vk::FormatFeatureFlagBits::eStorageImage;
|
||||
const vk::FormatFeatureFlags blit_usage = vk::FormatFeatureFlagBits::eSampledImage |
|
||||
vk::FormatFeatureFlagBits::eTransferDst |
|
||||
vk::FormatFeatureFlagBits::eTransferSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitDst;
|
||||
|
||||
// Return the most supported alternative format preferably with the
|
||||
// same block size according to the Vulkan spec.
|
||||
// See 43.3. Required Format Support of the Vulkan spec
|
||||
switch (format) {
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
return vk::Format::eD32SfloatS8Uint;
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
return vk::Format::eD32Sfloat;
|
||||
case vk::Format::eR5G5B5A1UnormPack16:
|
||||
return vk::Format::eA1R5G5B5UnormPack16;
|
||||
case vk::Format::eR8G8B8Unorm:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case vk::Format::eUndefined:
|
||||
return vk::Format::eUndefined;
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
// B4G4R4A4 is not guaranteed by the spec to support attachments
|
||||
return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16);
|
||||
default:
|
||||
LOG_WARNING(Render_Vulkan, "Format {} doesn't support attachments, falling back to RGBA8",
|
||||
vk::to_string(format));
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
for (const auto& pixel_format : pixel_formats) {
|
||||
const vk::Format format = ToVkFormat(pixel_format);
|
||||
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
|
||||
const vk::ImageAspectFlags aspect = GetImageAspect(format);
|
||||
|
||||
const vk::FormatFeatureFlagBits attachment_usage = (aspect & vk::ImageAspectFlagBits::eDepth) ?
|
||||
vk::FormatFeatureFlagBits::eDepthStencilAttachment :
|
||||
vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
|
||||
const bool supports_blit =
|
||||
(properties.optimalTilingFeatures & blit_usage) == blit_usage;
|
||||
const bool supports_attachment =
|
||||
(properties.optimalTilingFeatures & attachment_usage) == attachment_usage;
|
||||
const bool supports_storage =
|
||||
(properties.optimalTilingFeatures & storage_usage) == storage_usage;
|
||||
|
||||
// Find the most inclusive usage flags for this format
|
||||
vk::ImageUsageFlags best_usage;
|
||||
if (supports_blit) {
|
||||
best_usage |= vk::ImageUsageFlagBits::eSampled |
|
||||
vk::ImageUsageFlagBits::eTransferDst |
|
||||
vk::ImageUsageFlagBits::eTransferSrc;
|
||||
}
|
||||
if (supports_attachment) {
|
||||
best_usage |= (aspect & vk::ImageAspectFlagBits::eDepth) ?
|
||||
vk::ImageUsageFlagBits::eDepthStencilAttachment :
|
||||
vk::ImageUsageFlagBits::eColorAttachment;
|
||||
}
|
||||
if (supports_storage) {
|
||||
best_usage |= vk::ImageUsageFlagBits::eStorage;
|
||||
}
|
||||
|
||||
// Always fallback to RGBA8 or D32(S8) for convenience
|
||||
vk::Format fallback = vk::Format::eR8G8B8A8Unorm;
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
fallback = vk::Format::eD32Sfloat;
|
||||
if (aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
fallback = vk::Format::eD32SfloatS8Uint;
|
||||
}
|
||||
}
|
||||
|
||||
// Report completely unsupported formats
|
||||
if (!supports_blit && !supports_attachment && !supports_storage) {
|
||||
LOG_WARNING(Render_Vulkan, "Format {} unsupported, falling back unconditionally to {}",
|
||||
vk::to_string(format), vk::to_string(fallback));
|
||||
}
|
||||
|
||||
const u32 index = static_cast<u32>(pixel_format);
|
||||
format_table[index] = FormatTraits{
|
||||
.blit_support = supports_blit,
|
||||
.attachment_support = supports_attachment,
|
||||
.storage_support = supports_storage,
|
||||
.usage = best_usage,
|
||||
.native = format,
|
||||
.fallback = fallback
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,8 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "common/common_types.h"
|
||||
#include <array>
|
||||
#include <unordered_map>
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Frontend {
|
||||
@ -14,17 +15,23 @@ class EmuWindow;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct FormatTraits {
|
||||
bool blit_support = false; ///< True if the format supports omnidirectonal blit operations
|
||||
bool attachment_support = false; ///< True if the format supports being used as an attachment
|
||||
bool storage_support = false; ///< True if the format supports storage operations
|
||||
vk::ImageUsageFlags usage{}; ///< Most supported usage for the native format
|
||||
vk::Format native = vk::Format::eUndefined; ///< Closest possible native format
|
||||
vk::Format fallback = vk::Format::eUndefined; ///< Best fallback format
|
||||
};
|
||||
|
||||
/// The global Vulkan instance
|
||||
class Instance {
|
||||
public:
|
||||
Instance(Frontend::EmuWindow& window);
|
||||
~Instance();
|
||||
|
||||
/// Returns true when the format supports the provided feature flags
|
||||
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
|
||||
|
||||
/// Returns the most compatible format that supports the provided feature flags
|
||||
vk::Format GetFormatAlternative(vk::Format format) const;
|
||||
/// Returns the FormatTraits struct for the provided pixel format
|
||||
FormatTraits GetTraits(VideoCore::PixelFormat pixel_format) const;
|
||||
|
||||
/// Returns the Vulkan instance
|
||||
vk::Instance GetInstance() const {
|
||||
@ -103,6 +110,12 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Returns the optimal supported usage for the requested format
|
||||
vk::FormatFeatureFlags GetFormatFeatures(vk::Format format);
|
||||
|
||||
/// Creates the format compatibility table for the current device
|
||||
void CreateFormatTable();
|
||||
|
||||
/// Creates the logical device opportunistically enabling extensions
|
||||
bool CreateDevice();
|
||||
|
||||
@ -118,9 +131,9 @@ private:
|
||||
VmaAllocator allocator;
|
||||
vk::Queue present_queue;
|
||||
vk::Queue graphics_queue;
|
||||
std::array<FormatTraits, VideoCore::PIXEL_FORMAT_COUNT> format_table;
|
||||
u32 present_queue_family_index = 0;
|
||||
u32 graphics_queue_family_index = 0;
|
||||
|
||||
bool timeline_semaphores = false;
|
||||
bool extended_dynamic_state = false;
|
||||
bool push_descriptors = false;
|
||||
|
@ -180,13 +180,6 @@ PipelineCache::~PipelineCache() {
|
||||
void PipelineCache::BindPipeline(const PipelineInfo& info) {
|
||||
ApplyDynamic(info);
|
||||
|
||||
// When texture downloads occur the runtime will flush the GPU and cause
|
||||
// a scheduler slot switch behind our back. This might invalidate any
|
||||
// cached descriptor sets/require pipeline rebinding.
|
||||
if (timestamp != scheduler.GetHostFenceCounter()) {
|
||||
MarkDirty();
|
||||
}
|
||||
|
||||
u64 shader_hash = 0;
|
||||
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
|
||||
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
|
||||
@ -313,7 +306,6 @@ void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
|
||||
void PipelineCache::MarkDirty() {
|
||||
descriptor_dirty.fill(true);
|
||||
current_pipeline = VK_NULL_HANDLE;
|
||||
timestamp = scheduler.GetHostFenceCounter();
|
||||
}
|
||||
|
||||
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
|
||||
|
@ -248,7 +248,6 @@ private:
|
||||
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
|
||||
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_dirty{};
|
||||
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets;
|
||||
u64 timestamp = 0;
|
||||
|
||||
// Bound shader modules
|
||||
enum ProgramType : u32 {
|
||||
|
@ -94,9 +94,9 @@ constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
|
||||
}
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
constexpr u32 INDEX_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
constexpr u32 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
constexpr u32 INDEX_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
|
||||
constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {
|
||||
vk::Format::eR32G32Sfloat
|
||||
@ -188,6 +188,7 @@ RasterizerVulkan::~RasterizerVulkan() {
|
||||
|
||||
vmaDestroyImage(allocator, default_texture.image, default_texture.allocation);
|
||||
device.destroyImageView(default_texture.image_view);
|
||||
device.destroyImageView(default_texture.base_view);
|
||||
device.destroySampler(default_sampler);
|
||||
}
|
||||
|
||||
@ -598,12 +599,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
surfaces_rect.bottom, surfaces_rect.top))
|
||||
};
|
||||
|
||||
// Sync the viewport
|
||||
pipeline_cache.SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale,
|
||||
surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale,
|
||||
viewport_rect_unscaled.GetWidth() * res_scale,
|
||||
viewport_rect_unscaled.GetHeight() * res_scale);
|
||||
|
||||
if (uniform_block_data.data.framebuffer_scale != res_scale) {
|
||||
uniform_block_data.data.framebuffer_scale = res_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
@ -678,8 +673,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
}
|
||||
};
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
|
||||
// Sync and bind the texture surfaces
|
||||
const auto pica_textures = regs.texturing.GetTextures();
|
||||
for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
|
||||
@ -725,7 +718,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
auto surface = res_cache.GetTextureCube(config);
|
||||
if (surface != nullptr) {
|
||||
runtime.Transition(command_buffer, surface->alloc,
|
||||
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc,
|
||||
vk::ImageLayout::eShaderReadOnlyOptimal,
|
||||
0, surface->alloc.levels, 0, 6);
|
||||
pipeline_cache.BindTexture(3, surface->alloc.image_view);
|
||||
@ -746,7 +739,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
auto surface = res_cache.GetTextureSurface(texture);
|
||||
if (surface != nullptr) {
|
||||
runtime.Transition(command_buffer, surface->alloc,
|
||||
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc,
|
||||
vk::ImageLayout::eShaderReadOnlyOptimal,
|
||||
0, surface->alloc.levels);
|
||||
CheckBarrier(surface->alloc.image_view, texture_index);
|
||||
@ -767,6 +760,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: From here onwards its a safe zone to set the draw state, doing that any earlier will cause
|
||||
// issues as the rasterizer cache might cause a scheduler switch and invalidate our state
|
||||
|
||||
// Sync the viewport
|
||||
pipeline_cache.SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale,
|
||||
surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale,
|
||||
viewport_rect_unscaled.GetWidth() * res_scale,
|
||||
viewport_rect_unscaled.GetHeight() * res_scale);
|
||||
|
||||
// Sync and bind the shader
|
||||
if (shader_dirty) {
|
||||
SetShader();
|
||||
@ -786,8 +788,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
auto valid_surface = color_surface ? color_surface : depth_surface;
|
||||
const FramebufferInfo framebuffer_info = {
|
||||
.color = color_surface ? color_surface->alloc.image_view : VK_NULL_HANDLE,
|
||||
.depth = depth_surface ? depth_surface->alloc.image_view : VK_NULL_HANDLE,
|
||||
.color = color_surface ? color_surface->GetFramebufferView() : VK_NULL_HANDLE,
|
||||
.depth = depth_surface ? depth_surface->GetFramebufferView() : VK_NULL_HANDLE,
|
||||
.renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment,
|
||||
pipeline_info.depth_attachment, false),
|
||||
.width = valid_surface->GetScaledWidth(),
|
||||
@ -799,6 +801,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
it->second = CreateFramebuffer(framebuffer_info);
|
||||
}
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
if (color_surface) {
|
||||
runtime.Transition(command_buffer, color_surface->alloc,
|
||||
vk::ImageLayout::eColorAttachmentOptimal,
|
||||
@ -1509,15 +1512,9 @@ bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon
|
||||
const bool load_gap = output_gap != 0;
|
||||
auto [dst_surface, dst_rect] =
|
||||
res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, load_gap);
|
||||
if (dst_surface == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dst_surface->type == VideoCore::SurfaceType::Texture) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
|
||||
if (!dst_surface || dst_surface->type == VideoCore::SurfaceType::Texture ||
|
||||
!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -10,24 +10,23 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::Format ToVkFormatColor(u32 index) {
|
||||
VideoCore::PixelFormat ToFormatColor(u32 index) {
|
||||
switch (index) {
|
||||
case 0: return vk::Format::eR8G8B8A8Unorm;
|
||||
case 1: return vk::Format::eR8G8B8Unorm;
|
||||
case 2: return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case 3: return vk::Format::eR5G6B5UnormPack16;
|
||||
case 4: return vk::Format::eR4G4B4A4UnormPack16;
|
||||
default: return vk::Format::eUndefined;
|
||||
case 0: return VideoCore::PixelFormat::RGBA8;
|
||||
case 1: return VideoCore::PixelFormat::RGB8;
|
||||
case 2: return VideoCore::PixelFormat::RGB5A1;
|
||||
case 3: return VideoCore::PixelFormat::RGB565;
|
||||
case 4: return VideoCore::PixelFormat::RGBA4;
|
||||
default: return VideoCore::PixelFormat::Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
vk::Format ToVkFormatDepth(u32 index) {
|
||||
VideoCore::PixelFormat ToFormatDepth(u32 index) {
|
||||
switch (index) {
|
||||
case 0: return vk::Format::eD16Unorm;
|
||||
case 1: return vk::Format::eX8D24UnormPack32;
|
||||
// Notice the similar gap in PixelFormat
|
||||
case 3: return vk::Format::eD24UnormS8Uint;
|
||||
default: return vk::Format::eUndefined;
|
||||
case 0: return VideoCore::PixelFormat::D16;
|
||||
case 1: return VideoCore::PixelFormat::D24;
|
||||
case 3: return VideoCore::PixelFormat::D24S8;
|
||||
default: return VideoCore::PixelFormat::Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
@ -36,21 +35,23 @@ RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& schedu
|
||||
// Pre-create all needed renderpasses by the renderer
|
||||
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
|
||||
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
|
||||
const vk::Format color_format =
|
||||
instance.GetFormatAlternative(ToVkFormatColor(color));
|
||||
const vk::Format depth_stencil_format =
|
||||
instance.GetFormatAlternative(ToVkFormatDepth(depth));
|
||||
const FormatTraits color_traits = instance.GetTraits(ToFormatColor(color));
|
||||
const FormatTraits depth_traits = instance.GetTraits(ToFormatDepth(depth));
|
||||
|
||||
if (color_format == vk::Format::eUndefined &&
|
||||
depth_stencil_format == vk::Format::eUndefined) {
|
||||
const vk::Format color_format =
|
||||
color_traits.attachment_support ? color_traits.native : color_traits.fallback;
|
||||
const vk::Format depth_format =
|
||||
depth_traits.attachment_support ? depth_traits.native : depth_traits.fallback;
|
||||
|
||||
if (color_format == vk::Format::eUndefined && depth_format == vk::Format::eUndefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format,
|
||||
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_format,
|
||||
vk::AttachmentLoadOp::eLoad,
|
||||
vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::ImageLayout::eColorAttachmentOptimal);
|
||||
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format,
|
||||
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_format,
|
||||
vk::AttachmentLoadOp::eClear,
|
||||
vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::ImageLayout::eColorAttachmentOptimal);
|
||||
|
@ -5,12 +5,14 @@
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
|
||||
TaskScheduler::TaskScheduler(const Instance& instance, RendererVulkan& renderer)
|
||||
: instance{instance}, renderer{renderer} {
|
||||
vk::Device device = instance.GetDevice();
|
||||
const vk::CommandPoolCreateInfo command_pool_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
@ -97,11 +99,7 @@ void TaskScheduler::Synchronize(u32 slot) {
|
||||
const auto& command = commands[slot];
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
u32 completed_counter = completed_fence_counter;
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
completed_counter = device.getSemaphoreCounterValue(timeline);
|
||||
}
|
||||
|
||||
u32 completed_counter = GetFenceCounter();
|
||||
if (command.fence_counter > completed_counter) {
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
const vk::SemaphoreWaitInfo wait_info = {
|
||||
@ -127,6 +125,10 @@ void TaskScheduler::Synchronize(u32 slot) {
|
||||
}
|
||||
|
||||
void TaskScheduler::Submit(SubmitMode mode) {
|
||||
if (False(mode & SubmitMode::Shutdown)) {
|
||||
renderer.FlushBuffers();
|
||||
}
|
||||
|
||||
const auto& command = commands[current_command];
|
||||
command.render_command_buffer.end();
|
||||
if (command.use_upload_buffer) {
|
||||
@ -206,6 +208,7 @@ void TaskScheduler::Submit(SubmitMode mode) {
|
||||
// Switch to next cmdbuffer.
|
||||
if (False(mode & SubmitMode::Shutdown)) {
|
||||
SwitchSlot();
|
||||
renderer.OnSlotSwitch();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ namespace Vulkan {
|
||||
|
||||
class Buffer;
|
||||
class Instance;
|
||||
class RendererVulkan;
|
||||
|
||||
enum class SubmitMode : u8 {
|
||||
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
|
||||
@ -26,7 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
|
||||
|
||||
class TaskScheduler {
|
||||
public:
|
||||
TaskScheduler(const Instance& instance);
|
||||
TaskScheduler(const Instance& instance, RendererVulkan& renderer);
|
||||
~TaskScheduler();
|
||||
|
||||
/// Blocks the host until the current command completes execution
|
||||
@ -74,6 +75,7 @@ private:
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
RendererVulkan& renderer;
|
||||
u64 next_fence_counter = 1;
|
||||
u64 completed_fence_counter = 0;
|
||||
|
||||
|
@ -8,36 +8,10 @@
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
#include <vulkan/vulkan_format_traits.hpp>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
|
||||
switch (format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return vk::Format::eR8G8B8Unorm;
|
||||
case VideoCore::PixelFormat::RGB5A1:
|
||||
return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case VideoCore::PixelFormat::RGB565:
|
||||
return vk::Format::eR5G6B5UnormPack16;
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
case VideoCore::PixelFormat::D16:
|
||||
return vk::Format::eD16Unorm;
|
||||
case VideoCore::PixelFormat::D24:
|
||||
return vk::Format::eX8D24UnormPack32;
|
||||
case VideoCore::PixelFormat::D24S8:
|
||||
return vk::Format::eD24UnormS8Uint;
|
||||
case VideoCore::PixelFormat::Invalid:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
|
||||
return vk::Format::eUndefined;
|
||||
default:
|
||||
// Use default case for the texture formats
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
|
||||
switch (type) {
|
||||
case VideoCore::SurfaceType::Color:
|
||||
@ -55,23 +29,7 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
}
|
||||
|
||||
vk::FormatFeatureFlagBits ToVkFormatFeatures(VideoCore::SurfaceType type) {
|
||||
switch (type) {
|
||||
case VideoCore::SurfaceType::Color:
|
||||
case VideoCore::SurfaceType::Texture:
|
||||
case VideoCore::SurfaceType::Fill:
|
||||
return vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
case VideoCore::SurfaceType::Depth:
|
||||
case VideoCore::SurfaceType::DepthStencil:
|
||||
return vk::FormatFeatureFlagBits::eDepthStencilAttachment;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid surface type!");
|
||||
}
|
||||
|
||||
return vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
}
|
||||
|
||||
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache)
|
||||
@ -92,6 +50,7 @@ TextureRuntime::~TextureRuntime() {
|
||||
for (const auto& [key, alloc] : texture_recycler) {
|
||||
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
|
||||
device.destroyImageView(alloc.image_view);
|
||||
device.destroyImageView(alloc.base_view);
|
||||
}
|
||||
|
||||
for (const auto& [key, framebuffer] : clear_framebuffers) {
|
||||
@ -118,6 +77,10 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
};
|
||||
}
|
||||
|
||||
void TextureRuntime::Finish() {
|
||||
scheduler.Submit(SubmitMode::Flush);
|
||||
}
|
||||
|
||||
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
|
||||
staging_offsets[new_slot] = 0;
|
||||
}
|
||||
@ -140,9 +103,12 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
return alloc;
|
||||
}
|
||||
|
||||
// Create a new allocation
|
||||
vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format));
|
||||
vk::ImageAspectFlags aspect = GetImageAspect(vk_format);
|
||||
const FormatTraits traits = instance.GetTraits(format);
|
||||
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));
|
||||
|
||||
const bool is_suitable = traits.blit_support && traits.attachment_support;
|
||||
const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
|
||||
const vk::ImageUsageFlags vk_usage = is_suitable ? traits.usage : GetImageUsage(aspect);
|
||||
|
||||
const u32 levels = std::bit_width(std::max(width, height));
|
||||
const vk::ImageCreateInfo image_info = {
|
||||
@ -155,7 +121,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
.mipLevels = levels,
|
||||
.arrayLayers = layers,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = GetImageUsage(aspect),
|
||||
.usage = vk_usage
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_info = {
|
||||
@ -174,8 +140,23 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
}
|
||||
|
||||
vk::Image image = vk::Image{unsafe_image};
|
||||
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = type == VideoCore::TextureType::CubeMap ?
|
||||
vk::ImageViewType::eCube :
|
||||
vk::ImageViewType::e2D,
|
||||
.format = vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = levels,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = layers
|
||||
}
|
||||
};
|
||||
|
||||
// Also create a base mip view in case this is used as an attachment
|
||||
const vk::ImageViewCreateInfo base_view_info = {
|
||||
.image = image,
|
||||
.viewType = type == VideoCore::TextureType::CubeMap ?
|
||||
vk::ImageViewType::eCube :
|
||||
@ -192,13 +173,17 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::ImageView image_view = device.createImageView(view_info);
|
||||
vk::ImageView base_view = device.createImageView(base_view_info);
|
||||
|
||||
return ImageAlloc{
|
||||
.image = image,
|
||||
.image_view = image_view,
|
||||
.base_view = base_view,
|
||||
.allocation = allocation,
|
||||
.format = vk_format,
|
||||
.aspect = aspect,
|
||||
.levels = levels,
|
||||
.layers = layers
|
||||
};
|
||||
}
|
||||
|
||||
@ -206,21 +191,45 @@ void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& a
|
||||
texture_recycler.emplace(tag, std::move(alloc));
|
||||
}
|
||||
|
||||
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
void TextureRuntime::FormatConvert(const Surface& surface, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest) {
|
||||
const VideoCore::SurfaceType type = VideoCore::GetFormatType(format);
|
||||
const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type);
|
||||
|
||||
if (format == VideoCore::PixelFormat::RGBA8) {
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
} else if (format == VideoCore::PixelFormat::RGB8 && upload) {
|
||||
return Pica::Texture::ConvertBGRToRGBA(source, dest);
|
||||
} else if (instance.IsFormatSupported(ToVkFormat(format), feature)) {
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented converion for format {}!", format);
|
||||
if (!surface.NeedsConvert()) {
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
return;
|
||||
}
|
||||
|
||||
// Since this is the most common case handle it separately
|
||||
if (surface.pixel_format == VideoCore::PixelFormat::RGBA8) {
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
}
|
||||
|
||||
// Handle simple D24S8 interleave case
|
||||
if (surface.GetInternalFormat() == vk::Format::eD24UnormS8Uint) {
|
||||
return Pica::Texture::InterleaveD24S8(source, dest);
|
||||
}
|
||||
|
||||
if (upload) {
|
||||
switch (surface.pixel_format) {
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return Pica::Texture::ConvertBGRToRGBA(source, dest);
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return Pica::Texture::ConvertRGBA4ToRGBA8(source, dest);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (surface.pixel_format) {
|
||||
case VideoCore::PixelFormat::D24S8:
|
||||
return Pica::Texture::ConvertD32S8ToD24S8(source, dest);
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_WARNING(Render_Vulkan, "Missing format convertion: {} {} {}",
|
||||
vk::to_string(surface.traits.native), upload ? "->" : "<-", vk::to_string(surface.alloc.format));
|
||||
}
|
||||
|
||||
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
|
||||
@ -276,11 +285,12 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
|
||||
}
|
||||
|
||||
auto [it, new_framebuffer] = clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{});
|
||||
if (new_framebuffer) {
|
||||
if (new_framebuffer) {\
|
||||
const vk::ImageView framebuffer_view = surface.GetFramebufferView();
|
||||
const vk::FramebufferCreateInfo framebuffer_info = {
|
||||
.renderPass = clear_renderpass,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &alloc.image_view,
|
||||
.pAttachments = &framebuffer_view,
|
||||
.width = surface.GetScaledWidth(),
|
||||
.height = surface.GetScaledHeight(),
|
||||
.layers = 1
|
||||
@ -377,7 +387,7 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCor
|
||||
|
||||
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
blit_area, vk::Filter::eLinear);
|
||||
blit_area, vk::Filter::eNearest);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -528,7 +538,7 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al
|
||||
|
||||
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
|
||||
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
|
||||
scheduler{runtime.GetScheduler()} {
|
||||
scheduler{runtime.GetScheduler()}, traits{instance.GetTraits(pixel_format)} {
|
||||
|
||||
if (pixel_format != VideoCore::PixelFormat::Invalid) {
|
||||
alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
|
||||
@ -604,7 +614,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
const VideoCore::Rect2D rect = download.texture_rect;
|
||||
vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging.buffer_offset,
|
||||
.bufferOffset = staging.buffer_offset + download.buffer_offset,
|
||||
.bufferRowLength = rect.GetWidth(),
|
||||
.bufferImageHeight = rect.GetHeight(),
|
||||
.imageSubresource = {
|
||||
@ -624,20 +634,17 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
copy_regions[region_count++] = copy_region;
|
||||
|
||||
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
return; // HACK: Skip depth + stencil downloads for now
|
||||
copy_region.bufferOffset += staging.mapped.size();
|
||||
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
|
||||
copy_region.bufferOffset += 4 * staging.size / 5;
|
||||
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
|
||||
copy_regions[region_count++] = copy_region;
|
||||
}
|
||||
}
|
||||
|
||||
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
|
||||
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, 0, alloc.levels);
|
||||
|
||||
// Copy pixel data to the staging buffer
|
||||
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
staging.buffer, region_count, copy_regions.data());
|
||||
|
||||
scheduler.Submit(SubmitMode::Flush);
|
||||
}
|
||||
|
||||
// Lock this data until the next scheduler switch
|
||||
@ -645,6 +652,22 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
runtime.staging_offsets[current_slot] += staging.size;
|
||||
}
|
||||
|
||||
bool Surface::NeedsConvert() const {
|
||||
// RGBA8 needs a byteswap since R8G8B8A8UnormPack32 does not exist
|
||||
// D24S8 always needs an interleave pass even if natively supported
|
||||
return alloc.format != traits.native ||
|
||||
pixel_format == VideoCore::PixelFormat::RGBA8 ||
|
||||
pixel_format == VideoCore::PixelFormat::D24S8;
|
||||
}
|
||||
|
||||
u32 Surface::GetInternalBytesPerPixel() const {
|
||||
if (alloc.format == vk::Format::eD32SfloatS8Uint) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
return vk::blockSize(alloc.format);
|
||||
}
|
||||
|
||||
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
|
||||
/*const u32 rect_width = download.texture_rect.GetWidth();
|
||||
const u32 rect_height = download.texture_rect.GetHeight();
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "video_core/rasterizer_cache/surface_base.h"
|
||||
#include "video_core/rasterizer_cache/types.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@ -24,10 +25,14 @@ struct StagingData {
|
||||
struct ImageAlloc {
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
vk::ImageView base_view;
|
||||
VmaAllocation allocation;
|
||||
vk::ImageUsageFlags usage;
|
||||
vk::Format format;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
|
||||
u32 levels = 1;
|
||||
u32 layers = 1;
|
||||
};
|
||||
|
||||
class Instance;
|
||||
@ -48,6 +53,9 @@ public:
|
||||
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
|
||||
[[nodiscard]] StagingData FindStaging(u32 size, bool upload);
|
||||
|
||||
/// Causes a GPU command flush
|
||||
void Finish();
|
||||
|
||||
/// Allocates a vulkan image possibly resusing an existing one
|
||||
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type);
|
||||
@ -56,7 +64,7 @@ public:
|
||||
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc);
|
||||
|
||||
/// Performs required format convertions on the staging data
|
||||
void FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
void FormatConvert(const Surface& surface, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
/// Transitions the mip level range of the surface to new_layout
|
||||
@ -114,6 +122,27 @@ public:
|
||||
/// Downloads pixel data to staging from a rectangle region of the surface texture
|
||||
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
|
||||
|
||||
/// Returns true if the surface requires pixel data convertion
|
||||
bool NeedsConvert() const;
|
||||
|
||||
/// Returns the bpp of the internal surface format
|
||||
u32 GetInternalBytesPerPixel() const;
|
||||
|
||||
/// Returns an image view used to sample the surface from a shader
|
||||
vk::ImageView GetImageView() const {
|
||||
return alloc.image_view;
|
||||
}
|
||||
|
||||
/// Returns an image view used to create a framebuffer
|
||||
vk::ImageView GetFramebufferView() {
|
||||
return alloc.base_view;
|
||||
}
|
||||
|
||||
/// Returns the internal format of the allocated texture
|
||||
vk::Format GetInternalFormat() const {
|
||||
return alloc.format;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Downloads scaled image by downscaling the requested rectangle
|
||||
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
|
||||
@ -128,9 +157,8 @@ private:
|
||||
TextureRuntime& runtime;
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
|
||||
ImageAlloc alloc{};
|
||||
vk::Format internal_format = vk::Format::eUndefined;
|
||||
FormatTraits traits;
|
||||
};
|
||||
|
||||
struct Traits {
|
||||
|
@ -233,21 +233,67 @@ void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> des
|
||||
|
||||
void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < source.size(); i += 3) {
|
||||
dest[j] = source[i + 2];
|
||||
dest[j + 1] = source[i + 1];
|
||||
dest[j + 2] = source[i];
|
||||
dest[j + 3] = std::byte{0xFF};
|
||||
j += 4;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
dest[i] = source[j + 2];
|
||||
dest[i + 1] = source[j + 1];
|
||||
dest[i + 2] = source[j];
|
||||
dest[i + 3] = std::byte{0xFF};
|
||||
j += 3;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
for (u32 i = 0; i < source.size(); i += 4) {
|
||||
for (u32 i = 0; i < dest.size(); i += 4) {
|
||||
const u32 abgr = *reinterpret_cast<const u32*>(source.data() + i);
|
||||
const u32 rgba = Common::swap32(abgr);
|
||||
std::memcpy(dest.data() + i, &rgba, 4);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 depth_offset = 0;
|
||||
u32 stencil_offset = 4 * source.size() / 5;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
float depth;
|
||||
std::memcpy(&depth, source.data() + depth_offset, sizeof(float));
|
||||
u32 depth_uint = depth * 0xFFFFFF;
|
||||
|
||||
dest[i] = source[stencil_offset];
|
||||
std::memcpy(dest.data() + i + 1, &depth_uint, 3);
|
||||
|
||||
depth_offset += 4;
|
||||
stencil_offset += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
auto rgba = Color::DecodeRGBA4(reinterpret_cast<const u8*>(source.data() + j));
|
||||
std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba));
|
||||
j += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 2) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba));
|
||||
Color::EncodeRGBA4(rgba, reinterpret_cast<u8*>(dest.data() + i));
|
||||
j += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 depth_offset = 0;
|
||||
u32 stencil_offset = 3 * source.size() / 4;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
dest[i] = source[stencil_offset];
|
||||
std::memcpy(dest.data() + i + 1, source.data() + depth_offset, 3);
|
||||
depth_offset += 3;
|
||||
stencil_offset += 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Pica::Texture
|
||||
|
@ -80,4 +80,12 @@ void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> de
|
||||
*/
|
||||
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
} // namespace Pica::Texture
|
||||
|
Reference in New Issue
Block a user