renderer_vulkan: Rework format handling

* This is a pretty large commit that aims to solve some issues with the current format system
* The instance now builds at application initialization an array of format traits for each pixel format
  that includes information such as blit/attachment/storage support and fallback formats
* The runtime doesn't ask the instance for formats but receives these traits and can dedice on its own what to build
  For now we do the same as before, we require both blit and attachment support

* Morton swizzling also sees many bug fixes. The previous code was very hacky and didn't work for partial
  texture updates. It was also inconsistent, as it would take a tiled_buffer and write to the middle of linear
* Now the functions have been greatly simplified and adjusted to work better with std::span. This fixes out of bounds
  errors and texture glitches (like the display in Mario Kart 7)
This commit is contained in:
GPUCode
2022-10-01 18:16:57 +03:00
parent eeccdc02fc
commit 891b4bff18
23 changed files with 532 additions and 281 deletions

View File

@ -7,6 +7,7 @@
#include <array>
#include <memory>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"

View File

@ -135,42 +135,53 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::sp
}
}
/**
* @brief Performs morton to/from linear convertions on the provided pixel data
* @param width, height The dimentions of the rectangular region of pixels in linear_buffer
* @param start_offset The number of bytes from the start of the first tile to the start of tiled_buffer
* @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer
* @param linear_buffer The linear pixel data
* @param tiled_buffer The tiled pixel data
*
* The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for converting between
* linear and morton tiled layouts. The function handles both convertions but there are slightly different
* paths and inputs for each:
*
* Morton to Linear:
* During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the linear rectangle
* spans multiple vertical tiles. linear_buffer does not reference the entire texture area, but rather the
* specific rectangle affected by the upload.
*
* Linear to Morton:
* This is similar to the other convertion but with some differences. In this case tiled_buffer is not required
* to be aligned to any specific boundary which requires special care. start_offset/end_offset are useful
* here as they tell us exactly where the data should be placed in the linear_buffer.
*/
template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
std::span<std::byte> linear_buffer,
std::span<std::byte> tiled_buffer) {
static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer) {
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
// We could use bytes_per_pixel here but it should be avoided because it
// becomes zero for 4-bit textures!
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel;
// Does this line have any significance?
//u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
u32 linear_offset = 0;
u32 tiled_offset = 0;
const PAddr aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
const PAddr aligned_start_offset = Common::AlignUp(start_offset, tile_size);
PAddr aligned_end_offset = Common::AlignDown(end_offset, tile_size);
const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel;
const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
ASSERT(!morton_to_linear || (aligned_start_offset == start_offset && aligned_end_offset == end_offset));
const u32 begin_pixel_index = aligned_down_start_offset * 8 / GetFormatBpp(format);
u32 x = (begin_pixel_index % (stride * 8)) / 8;
u32 y = (begin_pixel_index / (stride * 8)) * 8;
// In OpenGL the texture origin is in the bottom left corner as opposed to other
// APIs that have it at the top left. To avoid flipping texture coordinates in
// the shader we read/write the linear buffer backwards
linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
// the shader we read/write the linear buffer from the bottom up
u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
u32 tiled_offset = 0;
u32 x = 0;
u32 y = 0;
auto linear_next_tile = [&] {
x = (x + 8) % stride;
const auto LinearNextTile = [&] {
x = (x + 8) % width;
linear_offset += 8 * aligned_bytes_per_pixel;
if (!x) {
y = (y + 8) % height;
@ -178,7 +189,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
return;
}
linear_offset -= stride * 9 * aligned_bytes_per_pixel;
linear_offset -= width * 9 * aligned_bytes_per_pixel;
}
};
@ -186,31 +197,31 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
// the tile affected to a temporary buffer and copy the part we are interested in
if (start_offset < aligned_start_offset && !morton_to_linear) {
std::array<std::byte, tile_size> tmp_buf;
auto linear_data = linear_buffer.last(linear_buffer.size_bytes() - linear_offset);
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
MortonCopyTile<morton_to_linear, format>(width, tmp_buf, linear_data);
std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
std::min(aligned_start_offset, end_offset) - start_offset);
tiled_offset += aligned_start_offset - start_offset;
linear_next_tile();
LinearNextTile();
}
const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset;
while (tiled_offset < buffer_end) {
auto linear_data = linear_buffer.last(linear_buffer.size_bytes() - linear_offset);
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data);
MortonCopyTile<morton_to_linear, format>(width, tiled_data, linear_data);
tiled_offset += tile_size;
linear_next_tile();
LinearNextTile();
}
// If during a texture download the end coordinate is not tile aligned, swizzle
// the tile affected to a temporary buffer and copy the part we are interested in
if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
std::array<std::byte, tile_size> tmp_buf;
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
MortonCopyTile<morton_to_linear, format>(width, tmp_buf, linear_data);
std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end_offset - aligned_end_offset);
}
}

View File

@ -163,7 +163,8 @@ private:
SurfaceMap dirty_regions;
SurfaceSet remove_surfaces;
u16 resolution_scale_factor;
std::vector<std::function<void()>> download_queue;
std::vector<std::byte> staging_buffer;
std::unordered_map<TextureCubeConfig, Surface> texture_cube_cache;
std::recursive_mutex mutex;
};
@ -269,9 +270,26 @@ bool RasterizerCache<T>::BlitSurfaces(const Surface& src_surface, Common::Rectan
const Surface& dst_surface, Common::Rectangle<u32> dst_rect) {
MICROPROFILE_SCOPE(RasterizerCache_BlitSurface);
if (CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
dst_surface->InvalidateAllWatcher();
if (!CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) [[unlikely]] {
return false;
}
dst_surface->InvalidateAllWatcher();
// Prefer texture copy over blit if possible
if (src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.bottom < src_rect.top) {
const TextureCopy texture_copy = {
.src_level = 0,
.dst_level = 0,
.src_layer = 0,
.dst_layer = 0,
.src_offset = {src_rect.left, src_rect.bottom},
.dst_offset = {dst_rect.left, dst_rect.bottom},
.extent = {src_rect.GetWidth(), src_rect.GetHeight()}
};
return runtime.CopyTextures(*src_surface, *dst_surface, texture_copy);
} else {
const TextureBlit texture_blit = {
.src_level = 0,
.dst_level = 0,
@ -283,8 +301,6 @@ bool RasterizerCache<T>::BlitSurfaces(const Surface& src_surface, Common::Rectan
return runtime.BlitTextures(*src_surface, *dst_surface, texture_blit);
}
return false;
}
MICROPROFILE_DECLARE(RasterizerCache_CopySurface);
@ -888,35 +904,31 @@ void RasterizerCache<T>::ValidateSurface(const Surface& surface, PAddr addr, u32
MICROPROFILE_DECLARE(RasterizerCache_SurfaceLoad);
template <class T>
void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval interval) {
const SurfaceParams info = surface->FromInterval(interval);
const u32 load_start = info.addr;
const u32 load_end = info.end;
ASSERT(load_start >= surface->addr && load_end <= surface->end);
const SurfaceParams load_info = surface->FromInterval(interval);
ASSERT(load_info.addr >= surface->addr && load_info.end <= surface->end);
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
const auto& staging = runtime.FindStaging(
surface->width * surface->height * 4, true);
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr);
load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true);
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(load_info.addr);
if (!source_ptr) [[unlikely]] {
return;
}
const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start);
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr);
if (surface->is_tiled) {
std::vector<std::byte> unswizzled_data(staging.size);
UnswizzleTexture(*surface, load_start - surface->addr, load_end - surface->addr,
upload_data, unswizzled_data);
runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped);
std::vector<std::byte> unswizzled_data(load_info.width * load_info.height * GetBytesPerPixel(load_info.pixel_format));
UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, unswizzled_data);
runtime.FormatConvert(*surface, true, unswizzled_data, staging.mapped);
} else {
runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped);
runtime.FormatConvert(*surface, true, upload_data, staging.mapped);
}
const BufferTextureCopy upload = {
.buffer_offset = 0,
.buffer_size = staging.size,
.texture_rect = surface->GetSubRect(info),
.texture_rect = surface->GetSubRect(load_info),
.texture_level = 0
};
@ -926,17 +938,17 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
MICROPROFILE_DECLARE(RasterizerCache_SurfaceFlush);
template <class T>
void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval interval) {
const SurfaceParams flush_info = surface->FromInterval(interval);
const u32 flush_start = boost::icl::first(interval);
const u32 flush_end = boost::icl::last_next(interval);
ASSERT(flush_start >= surface->addr && flush_end <= surface->end);
const auto& staging = runtime.FindStaging(
surface->width * surface->height * 4, false);
const SurfaceParams params = surface->FromInterval(interval);
flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false);
const BufferTextureCopy download = {
.buffer_offset = 0,
.buffer_size = staging.size,
.texture_rect = surface->GetSubRect(params),
.texture_rect = surface->GetSubRect(flush_info),
.texture_level = 0
};
@ -948,17 +960,15 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
}
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
if (surface->is_tiled) {
std::vector<std::byte> swizzled_data(staging.size);
runtime.FormatConvert(surface->pixel_format, false, swizzled_data, swizzled_data);
SwizzleTexture(*surface, flush_start - surface->addr, flush_end - surface->addr,
staging.mapped, download_dest);
} else {
runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest);
}
download_queue.push_back([this, surface, download_dest, flush_start, flush_end, flush_info, mapped = staging.mapped]() {
if (surface->is_tiled) {
std::vector<std::byte> temp_data(flush_info.width * flush_info.height * GetBytesPerPixel(flush_info.pixel_format));
runtime.FormatConvert(*surface, false, mapped, temp_data);
SwizzleTexture(flush_info, flush_start, flush_end, temp_data, download_dest);
} else {
runtime.FormatConvert(*surface, false, mapped, download_dest);
}
});
}
template <class T>
@ -1135,6 +1145,17 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, Surface flush_surface
flushed_intervals += interval;
}
// Batch execute all requested downloads. This gives more time for them to complete
// before we issue the CPU to GPU flush and reduces scheduler slot switches in Vulkan
if (!download_queue.empty()) {
runtime.Finish();
for (auto& download_func : download_queue) {
download_func();
}
download_queue.clear();
}
// Reset dirty regions
dirty_regions -= flushed_intervals;
}

View File

@ -41,6 +41,8 @@ struct TextureClear {
struct TextureCopy {
u32 src_level;
u32 dst_level;
u32 src_layer;
u32 dst_layer;
Offset src_offset;
Offset dst_offset;
Extent extent;

View File

@ -11,18 +11,22 @@
namespace VideoCore {
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
const u32 func_index = static_cast<u32>(params.pixel_format);
const u32 func_index = static_cast<u32>(swizzle_info.pixel_format);
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
SwizzleImpl(params.stride, params.height, start_offset, end_offset, source_linear, dest_tiled);
SwizzleImpl(swizzle_info.width, swizzle_info.height,
start_addr - swizzle_info.addr, end_addr - swizzle_info.addr,
source_linear, dest_tiled);
}
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
const u32 func_index = static_cast<u32>(params.pixel_format);
const u32 func_index = static_cast<u32>(unswizzle_info.pixel_format);
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
UnswizzleImpl(params.stride, params.height, start_offset, end_offset, dest_linear, source_tiled);
UnswizzleImpl(unswizzle_info.width, unswizzle_info.height,
start_addr - unswizzle_info.addr, end_addr - unswizzle_info.addr,
dest_linear, source_tiled);
}
ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) {

View File

@ -44,19 +44,29 @@ class SurfaceParams;
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
/**
* Converts a morton swizzled texture to linear format.
*
* @param params Structure used to query the surface information.
* @param start_offset Is the offset at which the source_tiled span begins
* @param unswizzle_info Structure used to query the surface information.
* @param start_addr The start address of the source_tiled data.
* @param end_addr The end address of the source_tiled data.
* @param source_tiled The tiled data to convert.
* @param dest_linear The output buffer where the generated linear data will be written to.
*/
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
/**
* Swizzles a linear texture according to the morton code.
*
* @param swizzle_info Structure used to query the surface information.
* @param start_addr The start address of the dest_tiled data.
* @param end_addr The end address of the dest_tiled data.
* @param source_tiled The source morton swizzled data.
* @param dest_linear The output buffer where the generated linear data will be written to.
*/
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
} // namespace VideoCore

View File

@ -124,8 +124,9 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f
return DEFAULT_TUPLE;
}
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
void TextureRuntime::FormatConvert(const Surface& surface, bool upload,
std::span<std::byte> source, std::span<std::byte> dest) {
const VideoCore::PixelFormat format = surface.pixel_format;
if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) {
Pica::Texture::ConvertABGRToRGBA(source, dest);
} else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) {

View File

@ -70,8 +70,10 @@ public:
/// Returns the OpenGL format tuple associated with the provided pixel format
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
void Finish() const {}
/// Performs required format convertions on the staging data
void FormatConvert(VideoCore::PixelFormat format, bool upload,
void FormatConvert(const Surface& surface, bool upload,
std::span<std::byte> source, std::span<std::byte> dest);
/// Allocates an OpenGL texture with the specified dimentions and format
@ -136,6 +138,11 @@ public:
/// Downloads pixel data to staging from a rectangle region of the surface texture
void Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging);
/// Returns the bpp of the internal surface format
u32 GetInternalBytesPerPixel() const {
return VideoCore::GetBytesPerPixel(pixel_format);
}
private:
/// Downloads scaled image by downscaling the requested rectangle
void ScaledDownload(const VideoCore::BufferTextureCopy& download);

View File

@ -183,7 +183,7 @@ static std::array<float, 3 * 2> MakeOrthographicMatrix(float width, float height
}
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
: RendererBase{window}, instance{window}, scheduler{instance}, renderpass_cache{instance, scheduler},
: RendererBase{window}, instance{window}, scheduler{instance, *this}, renderpass_cache{instance, scheduler},
runtime{instance, scheduler, renderpass_cache}, swapchain{instance, renderpass_cache},
vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}} {
@ -626,7 +626,7 @@ void RendererVulkan::BuildPipelines() {
void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer) {
TextureInfo old_texture = texture;
texture = TextureInfo {
texture = TextureInfo{
.alloc = runtime.Allocate(framebuffer.width, framebuffer.height,
VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format),
VideoCore::TextureType::Texture2D),
@ -1035,21 +1035,24 @@ void RendererVulkan::SwapBuffers() {
DrawScreens(layout, false);
// Flush all buffers to make the data visible to the GPU before submitting
rasterizer->FlushBuffers();
vertex_buffer.Flush();
scheduler.Submit(SubmitMode::SwapchainSynced);
swapchain.Present(present_ready);
}
// Inform texture runtime about the switch
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
void RendererVulkan::FlushBuffers() {
vertex_buffer.Flush();
rasterizer->FlushBuffers();
}
void RendererVulkan::OnSlotSwitch() {
// When the command buffer switches, all state becomes undefined.
// This is problematic with dynamic states, so set all states here
if (instance.IsExtendedDynamicStateSupported()) {
rasterizer->SyncFixedState();
}
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
rasterizer->pipeline_cache.MarkDirty();
}
} // namespace Vulkan

View File

@ -72,6 +72,8 @@ public:
void PrepareVideoDumping() override {}
void CleanupVideoDumping() override {}
void Sync() override;
void FlushBuffers();
void OnSlotSwitch();
private:
void ReloadSampler();

View File

@ -26,11 +26,11 @@ constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
switch (format) {
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD24UnormS8Uint:
case vk::Format::eX8D24UnormPack32:
case vk::Format::eD32SfloatS8Uint:
return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
break;
case vk::Format::eD16Unorm:
case vk::Format::eX8D24UnormPack32:
case vk::Format::eD32Sfloat:
return vk::ImageAspectFlagBits::eDepth;
break;

View File

@ -11,6 +11,33 @@
namespace Vulkan {
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
switch (format) {
case VideoCore::PixelFormat::RGBA8:
return vk::Format::eR8G8B8A8Unorm;
case VideoCore::PixelFormat::RGB8:
return vk::Format::eB8G8R8Unorm;
case VideoCore::PixelFormat::RGB5A1:
return vk::Format::eR5G5B5A1UnormPack16;
case VideoCore::PixelFormat::RGB565:
return vk::Format::eR5G6B5UnormPack16;
case VideoCore::PixelFormat::RGBA4:
return vk::Format::eR4G4B4A4UnormPack16;
case VideoCore::PixelFormat::D16:
return vk::Format::eD16Unorm;
case VideoCore::PixelFormat::D24:
return vk::Format::eX8D24UnormPack32;
case VideoCore::PixelFormat::D24S8:
return vk::Format::eD24UnormS8Uint;
case VideoCore::PixelFormat::Invalid:
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
return vk::Format::eUndefined;
default:
// Use default case for the texture formats
return vk::Format::eR8G8B8A8Unorm;
}
}
Instance::Instance(Frontend::EmuWindow& window) {
auto window_info = window.GetWindowInfo();
@ -54,6 +81,7 @@ Instance::Instance(Frontend::EmuWindow& window) {
device_properties = physical_device.getProperties();
CreateDevice();
CreateFormatTable();
}
Instance::~Instance() {
@ -64,50 +92,99 @@ Instance::~Instance() {
instance.destroy();
}
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
if (auto it = supported.find(format); it != supported.end()) {
return (it->second.optimalTilingFeatures & usage) == usage;
FormatTraits Instance::GetTraits(VideoCore::PixelFormat pixel_format) const {
if (pixel_format == VideoCore::PixelFormat::Invalid) [[unlikely]] {
return FormatTraits{};
}
// Cache format properties so we don't have to query the driver all the time
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
supported.insert(std::make_pair(format, properties));
return (properties.optimalTilingFeatures & usage) == usage;
const u32 index = static_cast<u32>(pixel_format);
return format_table[index];
}
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
if (format == vk::Format::eUndefined) {
return format;
}
void Instance::CreateFormatTable() {
constexpr std::array pixel_formats = {
VideoCore::PixelFormat::RGBA8,
VideoCore::PixelFormat::RGB8,
VideoCore::PixelFormat::RGB5A1,
VideoCore::PixelFormat::RGB565,
VideoCore::PixelFormat::RGBA4,
VideoCore::PixelFormat::IA8,
VideoCore::PixelFormat::RG8,
VideoCore::PixelFormat::I8,
VideoCore::PixelFormat::A8,
VideoCore::PixelFormat::IA4,
VideoCore::PixelFormat::I4,
VideoCore::PixelFormat::A4,
VideoCore::PixelFormat::ETC1,
VideoCore::PixelFormat::ETC1A4,
VideoCore::PixelFormat::D16,
VideoCore::PixelFormat::D24,
VideoCore::PixelFormat::D24S8
};
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
if (IsFormatSupported(format, features)) {
return format;
}
const vk::FormatFeatureFlags storage_usage = vk::FormatFeatureFlagBits::eStorageImage;
const vk::FormatFeatureFlags blit_usage = vk::FormatFeatureFlagBits::eSampledImage |
vk::FormatFeatureFlagBits::eTransferDst |
vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst;
// Return the most supported alternative format preferably with the
// same block size according to the Vulkan spec.
// See 43.3. Required Format Support of the Vulkan spec
switch (format) {
case vk::Format::eD24UnormS8Uint:
return vk::Format::eD32SfloatS8Uint;
case vk::Format::eX8D24UnormPack32:
return vk::Format::eD32Sfloat;
case vk::Format::eR5G5B5A1UnormPack16:
return vk::Format::eA1R5G5B5UnormPack16;
case vk::Format::eR8G8B8Unorm:
return vk::Format::eR8G8B8A8Unorm;
case vk::Format::eUndefined:
return vk::Format::eUndefined;
case vk::Format::eR4G4B4A4UnormPack16:
// B4G4R4A4 is not guaranteed by the spec to support attachments
return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16);
default:
LOG_WARNING(Render_Vulkan, "Format {} doesn't support attachments, falling back to RGBA8",
vk::to_string(format));
return vk::Format::eR8G8B8A8Unorm;
for (const auto& pixel_format : pixel_formats) {
const vk::Format format = ToVkFormat(pixel_format);
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
const vk::ImageAspectFlags aspect = GetImageAspect(format);
const vk::FormatFeatureFlagBits attachment_usage = (aspect & vk::ImageAspectFlagBits::eDepth) ?
vk::FormatFeatureFlagBits::eDepthStencilAttachment :
vk::FormatFeatureFlagBits::eColorAttachment;
const bool supports_blit =
(properties.optimalTilingFeatures & blit_usage) == blit_usage;
const bool supports_attachment =
(properties.optimalTilingFeatures & attachment_usage) == attachment_usage;
const bool supports_storage =
(properties.optimalTilingFeatures & storage_usage) == storage_usage;
// Find the most inclusive usage flags for this format
vk::ImageUsageFlags best_usage;
if (supports_blit) {
best_usage |= vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eTransferSrc;
}
if (supports_attachment) {
best_usage |= (aspect & vk::ImageAspectFlagBits::eDepth) ?
vk::ImageUsageFlagBits::eDepthStencilAttachment :
vk::ImageUsageFlagBits::eColorAttachment;
}
if (supports_storage) {
best_usage |= vk::ImageUsageFlagBits::eStorage;
}
// Always fallback to RGBA8 or D32(S8) for convenience
vk::Format fallback = vk::Format::eR8G8B8A8Unorm;
if (aspect & vk::ImageAspectFlagBits::eDepth) {
fallback = vk::Format::eD32Sfloat;
if (aspect & vk::ImageAspectFlagBits::eStencil) {
fallback = vk::Format::eD32SfloatS8Uint;
}
}
// Report completely unsupported formats
if (!supports_blit && !supports_attachment && !supports_storage) {
LOG_WARNING(Render_Vulkan, "Format {} unsupported, falling back unconditionally to {}",
vk::to_string(format), vk::to_string(fallback));
}
const u32 index = static_cast<u32>(pixel_format);
format_table[index] = FormatTraits{
.blit_support = supports_blit,
.attachment_support = supports_attachment,
.storage_support = supports_storage,
.usage = best_usage,
.native = format,
.fallback = fallback
};
}
}

View File

@ -4,8 +4,9 @@
#pragma once
#include <memory>
#include "common/common_types.h"
#include <array>
#include <unordered_map>
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Frontend {
@ -14,17 +15,23 @@ class EmuWindow;
namespace Vulkan {
struct FormatTraits {
bool blit_support = false; ///< True if the format supports omnidirectonal blit operations
bool attachment_support = false; ///< True if the format supports being used as an attachment
bool storage_support = false; ///< True if the format supports storage operations
vk::ImageUsageFlags usage{}; ///< Most supported usage for the native format
vk::Format native = vk::Format::eUndefined; ///< Closest possible native format
vk::Format fallback = vk::Format::eUndefined; ///< Best fallback format
};
/// The global Vulkan instance
class Instance {
public:
Instance(Frontend::EmuWindow& window);
~Instance();
/// Returns true when the format supports the provided feature flags
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
/// Returns the most compatible format that supports the provided feature flags
vk::Format GetFormatAlternative(vk::Format format) const;
/// Returns the FormatTraits struct for the provided pixel format
FormatTraits GetTraits(VideoCore::PixelFormat pixel_format) const;
/// Returns the Vulkan instance
vk::Instance GetInstance() const {
@ -103,6 +110,12 @@ public:
}
private:
/// Returns the optimal supported usage for the requested format
vk::FormatFeatureFlags GetFormatFeatures(vk::Format format);
/// Creates the format compatibility table for the current device
void CreateFormatTable();
/// Creates the logical device opportunistically enabling extensions
bool CreateDevice();
@ -118,9 +131,9 @@ private:
VmaAllocator allocator;
vk::Queue present_queue;
vk::Queue graphics_queue;
std::array<FormatTraits, VideoCore::PIXEL_FORMAT_COUNT> format_table;
u32 present_queue_family_index = 0;
u32 graphics_queue_family_index = 0;
bool timeline_semaphores = false;
bool extended_dynamic_state = false;
bool push_descriptors = false;

View File

@ -180,13 +180,6 @@ PipelineCache::~PipelineCache() {
void PipelineCache::BindPipeline(const PipelineInfo& info) {
ApplyDynamic(info);
// When texture downloads occur the runtime will flush the GPU and cause
// a scheduler slot switch behind our back. This might invalidate any
// cached descriptor sets/require pipeline rebinding.
if (timestamp != scheduler.GetHostFenceCounter()) {
MarkDirty();
}
u64 shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
@ -313,7 +306,6 @@ void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
void PipelineCache::MarkDirty() {
descriptor_dirty.fill(true);
current_pipeline = VK_NULL_HANDLE;
timestamp = scheduler.GetHostFenceCounter();
}
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {

View File

@ -248,7 +248,6 @@ private:
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_dirty{};
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets;
u64 timestamp = 0;
// Bound shader modules
enum ProgramType : u32 {

View File

@ -94,9 +94,9 @@ constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
}
constexpr u32 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
constexpr u32 INDEX_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr u32 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr u32 INDEX_BUFFER_SIZE = 8 * 1024 * 1024;
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {
vk::Format::eR32G32Sfloat
@ -188,6 +188,7 @@ RasterizerVulkan::~RasterizerVulkan() {
vmaDestroyImage(allocator, default_texture.image, default_texture.allocation);
device.destroyImageView(default_texture.image_view);
device.destroyImageView(default_texture.base_view);
device.destroySampler(default_sampler);
}
@ -598,12 +599,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
surfaces_rect.bottom, surfaces_rect.top))
};
// Sync the viewport
pipeline_cache.SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale,
surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale,
viewport_rect_unscaled.GetWidth() * res_scale,
viewport_rect_unscaled.GetHeight() * res_scale);
if (uniform_block_data.data.framebuffer_scale != res_scale) {
uniform_block_data.data.framebuffer_scale = res_scale;
uniform_block_data.dirty = true;
@ -678,8 +673,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
}
};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
// Sync and bind the texture surfaces
const auto pica_textures = regs.texturing.GetTextures();
for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
@ -725,7 +718,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureCube(config);
if (surface != nullptr) {
runtime.Transition(command_buffer, surface->alloc,
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc,
vk::ImageLayout::eShaderReadOnlyOptimal,
0, surface->alloc.levels, 0, 6);
pipeline_cache.BindTexture(3, surface->alloc.image_view);
@ -746,7 +739,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
runtime.Transition(command_buffer, surface->alloc,
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc,
vk::ImageLayout::eShaderReadOnlyOptimal,
0, surface->alloc.levels);
CheckBarrier(surface->alloc.image_view, texture_index);
@ -767,6 +760,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
}
}
// NOTE: From here onwards its a safe zone to set the draw state, doing that any earlier will cause
// issues as the rasterizer cache might cause a scheduler switch and invalidate our state
// Sync the viewport
pipeline_cache.SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale,
surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale,
viewport_rect_unscaled.GetWidth() * res_scale,
viewport_rect_unscaled.GetHeight() * res_scale);
// Sync and bind the shader
if (shader_dirty) {
SetShader();
@ -786,8 +788,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto valid_surface = color_surface ? color_surface : depth_surface;
const FramebufferInfo framebuffer_info = {
.color = color_surface ? color_surface->alloc.image_view : VK_NULL_HANDLE,
.depth = depth_surface ? depth_surface->alloc.image_view : VK_NULL_HANDLE,
.color = color_surface ? color_surface->GetFramebufferView() : VK_NULL_HANDLE,
.depth = depth_surface ? depth_surface->GetFramebufferView() : VK_NULL_HANDLE,
.renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment,
pipeline_info.depth_attachment, false),
.width = valid_surface->GetScaledWidth(),
@ -799,6 +801,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
it->second = CreateFramebuffer(framebuffer_info);
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (color_surface) {
runtime.Transition(command_buffer, color_surface->alloc,
vk::ImageLayout::eColorAttachmentOptimal,
@ -1509,15 +1512,9 @@ bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon
const bool load_gap = output_gap != 0;
auto [dst_surface, dst_rect] =
res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, load_gap);
if (dst_surface == nullptr) {
return false;
}
if (dst_surface->type == VideoCore::SurfaceType::Texture) {
return false;
}
if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
if (!dst_surface || dst_surface->type == VideoCore::SurfaceType::Texture ||
!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
return false;
}

View File

@ -10,24 +10,23 @@
namespace Vulkan {
vk::Format ToVkFormatColor(u32 index) {
VideoCore::PixelFormat ToFormatColor(u32 index) {
switch (index) {
case 0: return vk::Format::eR8G8B8A8Unorm;
case 1: return vk::Format::eR8G8B8Unorm;
case 2: return vk::Format::eR5G5B5A1UnormPack16;
case 3: return vk::Format::eR5G6B5UnormPack16;
case 4: return vk::Format::eR4G4B4A4UnormPack16;
default: return vk::Format::eUndefined;
case 0: return VideoCore::PixelFormat::RGBA8;
case 1: return VideoCore::PixelFormat::RGB8;
case 2: return VideoCore::PixelFormat::RGB5A1;
case 3: return VideoCore::PixelFormat::RGB565;
case 4: return VideoCore::PixelFormat::RGBA4;
default: return VideoCore::PixelFormat::Invalid;
}
}
vk::Format ToVkFormatDepth(u32 index) {
VideoCore::PixelFormat ToFormatDepth(u32 index) {
switch (index) {
case 0: return vk::Format::eD16Unorm;
case 1: return vk::Format::eX8D24UnormPack32;
// Notice the similar gap in PixelFormat
case 3: return vk::Format::eD24UnormS8Uint;
default: return vk::Format::eUndefined;
case 0: return VideoCore::PixelFormat::D16;
case 1: return VideoCore::PixelFormat::D24;
case 3: return VideoCore::PixelFormat::D24S8;
default: return VideoCore::PixelFormat::Invalid;
}
}
@ -36,21 +35,23 @@ RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& schedu
// Pre-create all needed renderpasses by the renderer
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
const vk::Format color_format =
instance.GetFormatAlternative(ToVkFormatColor(color));
const vk::Format depth_stencil_format =
instance.GetFormatAlternative(ToVkFormatDepth(depth));
const FormatTraits color_traits = instance.GetTraits(ToFormatColor(color));
const FormatTraits depth_traits = instance.GetTraits(ToFormatDepth(depth));
if (color_format == vk::Format::eUndefined &&
depth_stencil_format == vk::Format::eUndefined) {
const vk::Format color_format =
color_traits.attachment_support ? color_traits.native : color_traits.fallback;
const vk::Format depth_format =
depth_traits.attachment_support ? depth_traits.native : depth_traits.fallback;
if (color_format == vk::Format::eUndefined && depth_format == vk::Format::eUndefined) {
continue;
}
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format,
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_format,
vk::AttachmentLoadOp::eLoad,
vk::ImageLayout::eColorAttachmentOptimal,
vk::ImageLayout::eColorAttachmentOptimal);
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format,
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_format,
vk::AttachmentLoadOp::eClear,
vk::ImageLayout::eColorAttachmentOptimal,
vk::ImageLayout::eColorAttachmentOptimal);

View File

@ -5,12 +5,14 @@
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
TaskScheduler::TaskScheduler(const Instance& instance, RendererVulkan& renderer)
: instance{instance}, renderer{renderer} {
vk::Device device = instance.GetDevice();
const vk::CommandPoolCreateInfo command_pool_info = {
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
@ -97,11 +99,7 @@ void TaskScheduler::Synchronize(u32 slot) {
const auto& command = commands[slot];
vk::Device device = instance.GetDevice();
u32 completed_counter = completed_fence_counter;
if (instance.IsTimelineSemaphoreSupported()) {
completed_counter = device.getSemaphoreCounterValue(timeline);
}
u32 completed_counter = GetFenceCounter();
if (command.fence_counter > completed_counter) {
if (instance.IsTimelineSemaphoreSupported()) {
const vk::SemaphoreWaitInfo wait_info = {
@ -127,6 +125,10 @@ void TaskScheduler::Synchronize(u32 slot) {
}
void TaskScheduler::Submit(SubmitMode mode) {
if (False(mode & SubmitMode::Shutdown)) {
renderer.FlushBuffers();
}
const auto& command = commands[current_command];
command.render_command_buffer.end();
if (command.use_upload_buffer) {
@ -206,6 +208,7 @@ void TaskScheduler::Submit(SubmitMode mode) {
// Switch to next cmdbuffer.
if (False(mode & SubmitMode::Shutdown)) {
SwitchSlot();
renderer.OnSlotSwitch();
}
}

View File

@ -15,6 +15,7 @@ namespace Vulkan {
class Buffer;
class Instance;
class RendererVulkan;
enum class SubmitMode : u8 {
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
@ -26,7 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
class TaskScheduler {
public:
TaskScheduler(const Instance& instance);
TaskScheduler(const Instance& instance, RendererVulkan& renderer);
~TaskScheduler();
/// Blocks the host until the current command completes execution
@ -74,6 +75,7 @@ private:
private:
const Instance& instance;
RendererVulkan& renderer;
u64 next_fence_counter = 1;
u64 completed_fence_counter = 0;

View File

@ -8,36 +8,10 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include <vulkan/vulkan_format_traits.hpp>
namespace Vulkan {
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
switch (format) {
case VideoCore::PixelFormat::RGBA8:
return vk::Format::eR8G8B8A8Unorm;
case VideoCore::PixelFormat::RGB8:
return vk::Format::eR8G8B8Unorm;
case VideoCore::PixelFormat::RGB5A1:
return vk::Format::eR5G5B5A1UnormPack16;
case VideoCore::PixelFormat::RGB565:
return vk::Format::eR5G6B5UnormPack16;
case VideoCore::PixelFormat::RGBA4:
return vk::Format::eR4G4B4A4UnormPack16;
case VideoCore::PixelFormat::D16:
return vk::Format::eD16Unorm;
case VideoCore::PixelFormat::D24:
return vk::Format::eX8D24UnormPack32;
case VideoCore::PixelFormat::D24S8:
return vk::Format::eD24UnormS8Uint;
case VideoCore::PixelFormat::Invalid:
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
return vk::Format::eUndefined;
default:
// Use default case for the texture formats
return vk::Format::eR8G8B8A8Unorm;
}
}
vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
switch (type) {
case VideoCore::SurfaceType::Color:
@ -55,23 +29,7 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
return vk::ImageAspectFlagBits::eColor;
}
vk::FormatFeatureFlagBits ToVkFormatFeatures(VideoCore::SurfaceType type) {
switch (type) {
case VideoCore::SurfaceType::Color:
case VideoCore::SurfaceType::Texture:
case VideoCore::SurfaceType::Fill:
return vk::FormatFeatureFlagBits::eColorAttachment;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return vk::FormatFeatureFlagBits::eDepthStencilAttachment;
default:
UNREACHABLE_MSG("Invalid surface type!");
}
return vk::FormatFeatureFlagBits::eColorAttachment;
}
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache)
@ -92,6 +50,7 @@ TextureRuntime::~TextureRuntime() {
for (const auto& [key, alloc] : texture_recycler) {
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
device.destroyImageView(alloc.image_view);
device.destroyImageView(alloc.base_view);
}
for (const auto& [key, framebuffer] : clear_framebuffers) {
@ -118,6 +77,10 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
};
}
void TextureRuntime::Finish() {
scheduler.Submit(SubmitMode::Flush);
}
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
staging_offsets[new_slot] = 0;
}
@ -140,9 +103,12 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
return alloc;
}
// Create a new allocation
vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format));
vk::ImageAspectFlags aspect = GetImageAspect(vk_format);
const FormatTraits traits = instance.GetTraits(format);
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));
const bool is_suitable = traits.blit_support && traits.attachment_support;
const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
const vk::ImageUsageFlags vk_usage = is_suitable ? traits.usage : GetImageUsage(aspect);
const u32 levels = std::bit_width(std::max(width, height));
const vk::ImageCreateInfo image_info = {
@ -155,7 +121,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
.mipLevels = levels,
.arrayLayers = layers,
.samples = vk::SampleCountFlagBits::e1,
.usage = GetImageUsage(aspect),
.usage = vk_usage
};
const VmaAllocationCreateInfo alloc_info = {
@ -174,8 +140,23 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
}
vk::Image image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = type == VideoCore::TextureType::CubeMap ?
vk::ImageViewType::eCube :
vk::ImageViewType::e2D,
.format = vk_format,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = 0,
.levelCount = levels,
.baseArrayLayer = 0,
.layerCount = layers
}
};
// Also create a base mip view in case this is used as an attachment
const vk::ImageViewCreateInfo base_view_info = {
.image = image,
.viewType = type == VideoCore::TextureType::CubeMap ?
vk::ImageViewType::eCube :
@ -192,13 +173,17 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
vk::Device device = instance.GetDevice();
vk::ImageView image_view = device.createImageView(view_info);
vk::ImageView base_view = device.createImageView(base_view_info);
return ImageAlloc{
.image = image,
.image_view = image_view,
.base_view = base_view,
.allocation = allocation,
.format = vk_format,
.aspect = aspect,
.levels = levels,
.layers = layers
};
}
@ -206,21 +191,45 @@ void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& a
texture_recycler.emplace(tag, std::move(alloc));
}
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
void TextureRuntime::FormatConvert(const Surface& surface, bool upload,
std::span<std::byte> source, std::span<std::byte> dest) {
const VideoCore::SurfaceType type = VideoCore::GetFormatType(format);
const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type);
if (format == VideoCore::PixelFormat::RGBA8) {
return Pica::Texture::ConvertABGRToRGBA(source, dest);
} else if (format == VideoCore::PixelFormat::RGB8 && upload) {
return Pica::Texture::ConvertBGRToRGBA(source, dest);
} else if (instance.IsFormatSupported(ToVkFormat(format), feature)) {
std::memcpy(dest.data(), source.data(), source.size());
} else {
LOG_CRITICAL(Render_Vulkan, "Unimplemented converion for format {}!", format);
if (!surface.NeedsConvert()) {
std::memcpy(dest.data(), source.data(), source.size());
return;
}
// Since this is the most common case handle it separately
if (surface.pixel_format == VideoCore::PixelFormat::RGBA8) {
return Pica::Texture::ConvertABGRToRGBA(source, dest);
}
// Handle simple D24S8 interleave case
if (surface.GetInternalFormat() == vk::Format::eD24UnormS8Uint) {
return Pica::Texture::InterleaveD24S8(source, dest);
}
if (upload) {
switch (surface.pixel_format) {
case VideoCore::PixelFormat::RGB8:
return Pica::Texture::ConvertBGRToRGBA(source, dest);
case VideoCore::PixelFormat::RGBA4:
return Pica::Texture::ConvertRGBA4ToRGBA8(source, dest);
default:
break;
}
} else {
switch (surface.pixel_format) {
case VideoCore::PixelFormat::D24S8:
return Pica::Texture::ConvertD32S8ToD24S8(source, dest);
case VideoCore::PixelFormat::RGBA4:
return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest);
default:
break;
}
}
LOG_WARNING(Render_Vulkan, "Missing format convertion: {} {} {}",
vk::to_string(surface.traits.native), upload ? "->" : "<-", vk::to_string(surface.alloc.format));
}
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
@ -276,11 +285,12 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
}
auto [it, new_framebuffer] = clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{});
if (new_framebuffer) {
if (new_framebuffer) {\
const vk::ImageView framebuffer_view = surface.GetFramebufferView();
const vk::FramebufferCreateInfo framebuffer_info = {
.renderPass = clear_renderpass,
.attachmentCount = 1,
.pAttachments = &alloc.image_view,
.pAttachments = &framebuffer_view,
.width = surface.GetScaledWidth(),
.height = surface.GetScaledHeight(),
.layers = 1
@ -377,7 +387,7 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCor
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
blit_area, vk::Filter::eNearest);
return true;
}
@ -528,7 +538,7 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()} {
scheduler{runtime.GetScheduler()}, traits{instance.GetTraits(pixel_format)} {
if (pixel_format != VideoCore::PixelFormat::Invalid) {
alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
@ -604,7 +614,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = download.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferOffset = staging.buffer_offset + download.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
@ -624,20 +634,17 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
copy_regions[region_count++] = copy_region;
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
return; // HACK: Skip depth + stencil downloads for now
copy_region.bufferOffset += staging.mapped.size();
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
copy_region.bufferOffset += 4 * staging.size / 5;
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, 0, alloc.levels);
// Copy pixel data to the staging buffer
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data());
scheduler.Submit(SubmitMode::Flush);
}
// Lock this data until the next scheduler switch
@ -645,6 +652,22 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
runtime.staging_offsets[current_slot] += staging.size;
}
bool Surface::NeedsConvert() const {
// RGBA8 needs a byteswap since R8G8B8A8UnormPack32 does not exist
// D24S8 always needs an interleave pass even if natively supported
return alloc.format != traits.native ||
pixel_format == VideoCore::PixelFormat::RGBA8 ||
pixel_format == VideoCore::PixelFormat::D24S8;
}
u32 Surface::GetInternalBytesPerPixel() const {
if (alloc.format == vk::Format::eD32SfloatS8Uint) {
return 8;
}
return vk::blockSize(alloc.format);
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
/*const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();

View File

@ -10,6 +10,7 @@
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/rasterizer_cache/types.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
@ -24,10 +25,14 @@ struct StagingData {
struct ImageAlloc {
vk::Image image;
vk::ImageView image_view;
vk::ImageView base_view;
VmaAllocation allocation;
vk::ImageUsageFlags usage;
vk::Format format;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
u32 levels = 1;
u32 layers = 1;
};
class Instance;
@ -48,6 +53,9 @@ public:
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
[[nodiscard]] StagingData FindStaging(u32 size, bool upload);
/// Causes a GPU command flush
void Finish();
/// Allocates a vulkan image possibly resusing an existing one
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
@ -56,7 +64,7 @@ public:
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc);
/// Performs required format convertions on the staging data
void FormatConvert(VideoCore::PixelFormat format, bool upload,
void FormatConvert(const Surface& surface, bool upload,
std::span<std::byte> source, std::span<std::byte> dest);
/// Transitions the mip level range of the surface to new_layout
@ -114,6 +122,27 @@ public:
/// Downloads pixel data to staging from a rectangle region of the surface texture
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
/// Returns true if the surface requires pixel data convertion
bool NeedsConvert() const;
/// Returns the bpp of the internal surface format
u32 GetInternalBytesPerPixel() const;
/// Returns an image view used to sample the surface from a shader
vk::ImageView GetImageView() const {
return alloc.image_view;
}
/// Returns an image view used to create a framebuffer
vk::ImageView GetFramebufferView() {
return alloc.base_view;
}
/// Returns the internal format of the allocated texture
vk::Format GetInternalFormat() const {
return alloc.format;
}
private:
/// Downloads scaled image by downscaling the requested rectangle
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
@ -128,9 +157,8 @@ private:
TextureRuntime& runtime;
const Instance& instance;
TaskScheduler& scheduler;
ImageAlloc alloc{};
vk::Format internal_format = vk::Format::eUndefined;
FormatTraits traits;
};
struct Traits {

View File

@ -233,21 +233,67 @@ void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> des
void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
u32 j = 0;
for (std::size_t i = 0; i < source.size(); i += 3) {
dest[j] = source[i + 2];
dest[j + 1] = source[i + 1];
dest[j + 2] = source[i];
dest[j + 3] = std::byte{0xFF};
j += 4;
for (std::size_t i = 0; i < dest.size(); i += 4) {
dest[i] = source[j + 2];
dest[i + 1] = source[j + 1];
dest[i + 2] = source[j];
dest[i + 3] = std::byte{0xFF};
j += 3;
}
}
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
for (u32 i = 0; i < source.size(); i += 4) {
for (u32 i = 0; i < dest.size(); i += 4) {
const u32 abgr = *reinterpret_cast<const u32*>(source.data() + i);
const u32 rgba = Common::swap32(abgr);
std::memcpy(dest.data() + i, &rgba, 4);
}
}
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
u32 depth_offset = 0;
u32 stencil_offset = 4 * source.size() / 5;
for (std::size_t i = 0; i < dest.size(); i += 4) {
float depth;
std::memcpy(&depth, source.data() + depth_offset, sizeof(float));
u32 depth_uint = depth * 0xFFFFFF;
dest[i] = source[stencil_offset];
std::memcpy(dest.data() + i + 1, &depth_uint, 3);
depth_offset += 4;
stencil_offset += 1;
}
}
void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest) {
u32 j = 0;
for (std::size_t i = 0; i < dest.size(); i += 4) {
auto rgba = Color::DecodeRGBA4(reinterpret_cast<const u8*>(source.data() + j));
std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba));
j += 2;
}
}
void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest) {
u32 j = 0;
for (std::size_t i = 0; i < dest.size(); i += 2) {
Common::Vec4<u8> rgba;
std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba));
Color::EncodeRGBA4(rgba, reinterpret_cast<u8*>(dest.data() + i));
j += 4;
}
}
void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
u32 depth_offset = 0;
u32 stencil_offset = 3 * source.size() / 4;
for (std::size_t i = 0; i < dest.size(); i += 4) {
dest[i] = source[stencil_offset];
std::memcpy(dest.data() + i + 1, source.data() + depth_offset, 3);
depth_offset += 3;
stencil_offset += 1;
}
}
} // namespace Pica::Texture

View File

@ -80,4 +80,12 @@ void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> de
*/
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest);
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest);
void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest);
void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
} // namespace Pica::Texture