renderer_vulkan: Optimize tiled format convertion + fix vertex buffer alignment
* Integrate format convertion to the morton copy function, removing the need for an intermediate copy and convertion pass. This should be beneficial for performance especially since most games use tiled textures * Also bump vertex buffer size to avoid crashes with hardware shaders and provide correct offset on normal draws which fixes glitches in pokemon Y * Reduce the local group size to 8 in the D24S8 compute shader which fixes graphical issues in the afformentioned pokemon games at native resolution * Set LOD to 0 instead of 0.25 to fix another glitch in pokemon y
This commit is contained in:
@@ -26,6 +26,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
|
||||
ui->graphics_api_combo->setEnabled(not_running);
|
||||
ui->toggle_shader_jit->setEnabled(not_running);
|
||||
ui->toggle_disk_shader_cache->setEnabled(hw_renderer_enabled && not_running);
|
||||
ui->physical_device_combo->setEnabled(not_running);
|
||||
SetPhysicalDeviceComboVisibility(ui->graphics_api_combo->currentIndex());
|
||||
|
||||
connect(ui->graphics_api_combo, qOverload<int>(&QComboBox::currentIndexChanged), this,
|
||||
|
@@ -22,13 +22,31 @@ inline T MakeInt(const std::byte* bytes) {
|
||||
return integer;
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
template <PixelFormat format, bool converted>
|
||||
inline void DecodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 d24s8 = std::rotl(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &d24s8, sizeof(u32));
|
||||
} else if constexpr (format == PixelFormat::RGBA8 && converted) {
|
||||
const u32 rgba = MakeInt<u32>(source);
|
||||
const u32 abgr = Common::swap32(rgba);
|
||||
std::memcpy(dest, &abgr, 4);
|
||||
} else if constexpr (format == PixelFormat::RGB8 && converted) {
|
||||
u32 rgb{};
|
||||
std::memcpy(&rgb, source, 3);
|
||||
const u32 abgr = Common::swap32(rgb << 8) | 0xFF000000;
|
||||
std::memcpy(dest, &abgr, 4);
|
||||
} else if constexpr (format == PixelFormat::RGB565 && converted) {
|
||||
const auto abgr = Color::DecodeRGB565(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGB5A1 && converted) {
|
||||
const auto abgr = Color::DecodeRGB5A1(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGBA4 && converted) {
|
||||
const auto abgr = Color::DecodeRGBA4(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::IA8) {
|
||||
std::memset(dest, static_cast<int>(source[1]), 3);
|
||||
dest[3] = source[0];
|
||||
@@ -95,23 +113,43 @@ inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byt
|
||||
dest_pixel[3] = std::byte{alpha};
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
template <PixelFormat format, bool converted>
|
||||
inline void EncodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 s8d24 = std::rotr(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &s8d24, sizeof(u32));
|
||||
} else if constexpr (format == PixelFormat::RGBA8 && converted) {
|
||||
const u32 abgr = MakeInt<u32>(source);
|
||||
const u32 rgba = Common::swap32(abgr);
|
||||
std::memcpy(dest, &rgba, 4);
|
||||
} else if constexpr (format == PixelFormat::RGB8 && converted) {
|
||||
const u32 abgr = MakeInt<u32>(source);
|
||||
const u32 rgb = Common::swap32(abgr << 8);
|
||||
std::memcpy(dest, &rgb, 3);
|
||||
} else if constexpr (format == PixelFormat::RGB565 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Color::EncodeRGB565(rgba, reinterpret_cast<u8*>(dest));
|
||||
} else if constexpr (format == PixelFormat::RGB5A1 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Color::EncodeRGB5A1(rgba, reinterpret_cast<u8*>(dest));
|
||||
} else if constexpr (format == PixelFormat::RGBA4 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Color::EncodeRGBA4(rgba, reinterpret_cast<u8*>(dest));
|
||||
} else {
|
||||
std::memcpy(dest, source, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted>
|
||||
inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
std::span<std::byte> linear_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 linear_bytes_per_pixel = GetBytesPerPixel(format);
|
||||
constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
|
||||
constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4;
|
||||
|
||||
@@ -127,10 +165,10 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
} else if constexpr (is_4bit) {
|
||||
DecodePixel4<format>(x, y, tile_buffer.data(), linear_pixel.data());
|
||||
} else {
|
||||
DecodePixel<format>(tiled_pixel.data(), linear_pixel.data());
|
||||
DecodePixel<format, converted>(tiled_pixel.data(), linear_pixel.data());
|
||||
}
|
||||
} else {
|
||||
EncodePixel<format>(linear_pixel.data(), tiled_pixel.data());
|
||||
EncodePixel<format, converted>(linear_pixel.data(), tiled_pixel.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -138,6 +176,7 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
|
||||
/**
|
||||
* @brief Performs morton to/from linear convertions on the provided pixel data
|
||||
* @param converted If true performs RGBA8 to/from convertion to all color formats
|
||||
* @param width, height The dimentions of the rectangular region of pixels in linear_buffer
|
||||
* @param start_offset The number of bytes from the start of the first tile to the start of
|
||||
* tiled_buffer
|
||||
@@ -160,11 +199,11 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
* start_offset/end_offset are useful here as they tell us exactly where the data should be placed
|
||||
* in the linear_buffer.
|
||||
*/
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted = false>
|
||||
static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
||||
constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
||||
|
||||
@@ -202,7 +241,7 @@ static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
if (start_offset < aligned_start_offset && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format>(width, tmp_buf, linear_data);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
|
||||
|
||||
std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
|
||||
std::min(aligned_start_offset, end_offset) - start_offset);
|
||||
@@ -215,7 +254,7 @@ static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
while (tiled_offset < buffer_end) {
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
|
||||
MortonCopyTile<morton_to_linear, format>(width, tiled_data, linear_data);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
|
||||
tiled_offset += tile_size;
|
||||
LinearNextTile();
|
||||
}
|
||||
@@ -225,7 +264,7 @@ static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format>(width, tmp_buf, linear_data);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
|
||||
std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(),
|
||||
end_offset - aligned_end_offset);
|
||||
}
|
||||
@@ -254,6 +293,14 @@ static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
||||
MortonCopy<true, PixelFormat::D24S8> // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE_CONVERTED = {
|
||||
MortonCopy<true, PixelFormat::RGBA8, true>, // 0
|
||||
MortonCopy<true, PixelFormat::RGB8, true>, // 1
|
||||
MortonCopy<true, PixelFormat::RGB5A1, true>, // 2
|
||||
MortonCopy<true, PixelFormat::RGB565, true>, // 3
|
||||
MortonCopy<true, PixelFormat::RGBA4, true> // 4
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE = {
|
||||
MortonCopy<false, PixelFormat::RGBA8>, // 0
|
||||
MortonCopy<false, PixelFormat::RGB8>, // 1
|
||||
@@ -275,4 +322,12 @@ static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE = {
|
||||
MortonCopy<false, PixelFormat::D24S8> // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE_CONVERTED = {
|
||||
MortonCopy<false, PixelFormat::RGBA8, true>, // 0
|
||||
MortonCopy<false, PixelFormat::RGB8, true>, // 1
|
||||
MortonCopy<false, PixelFormat::RGB5A1, true>, // 2
|
||||
MortonCopy<false, PixelFormat::RGB565, true>, // 3
|
||||
MortonCopy<false, PixelFormat::RGBA4, true> // 4
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@@ -916,10 +916,8 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
|
||||
|
||||
const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr);
|
||||
if (surface->is_tiled) {
|
||||
std::vector<std::byte> unswizzled_data(load_info.width * load_info.height *
|
||||
GetBytesPerPixel(load_info.pixel_format));
|
||||
UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, unswizzled_data);
|
||||
runtime.FormatConvert(*surface, true, unswizzled_data, staging.mapped);
|
||||
UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
} else {
|
||||
runtime.FormatConvert(*surface, true, upload_data, staging.mapped);
|
||||
}
|
||||
@@ -959,10 +957,8 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||
|
||||
if (surface->is_tiled) {
|
||||
std::vector<std::byte> temp_data(flush_info.width * flush_info.height *
|
||||
GetBytesPerPixel(flush_info.pixel_format));
|
||||
runtime.FormatConvert(*surface, false, mapped, temp_data);
|
||||
SwizzleTexture(flush_info, flush_start, flush_end, temp_data, download_dest);
|
||||
SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
} else {
|
||||
runtime.FormatConvert(*surface, false, mapped, download_dest);
|
||||
}
|
||||
|
@@ -48,17 +48,20 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d
|
||||
}
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled,
|
||||
bool convert) {
|
||||
const u32 func_index = static_cast<u32>(swizzle_info.pixel_format);
|
||||
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
|
||||
const MortonFunc SwizzleImpl = (convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index];
|
||||
SwizzleImpl(swizzle_info.width, swizzle_info.height, start_addr - swizzle_info.addr,
|
||||
end_addr - swizzle_info.addr, source_linear, dest_tiled);
|
||||
}
|
||||
|
||||
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear,
|
||||
bool convert) {
|
||||
const u32 func_index = static_cast<u32>(unswizzle_info.pixel_format);
|
||||
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
|
||||
const MortonFunc UnswizzleImpl =
|
||||
(convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index];
|
||||
UnswizzleImpl(unswizzle_info.width, unswizzle_info.height, start_addr - unswizzle_info.addr,
|
||||
end_addr - unswizzle_info.addr, dest_linear, source_tiled);
|
||||
}
|
||||
|
@@ -116,7 +116,8 @@ struct TextureCubeConfig {
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
*/
|
||||
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear,
|
||||
bool convert = false);
|
||||
|
||||
/**
|
||||
* Swizzles a linear texture according to the morton code.
|
||||
@@ -128,7 +129,8 @@ void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAd
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
*/
|
||||
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled,
|
||||
bool convert = false);
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
|
@@ -11,7 +11,7 @@ namespace OpenGL {
|
||||
|
||||
D24S8toRGBA8::D24S8toRGBA8(bool use_texture_view) : use_texture_view{use_texture_view} {
|
||||
constexpr std::string_view cs_source = R"(
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
||||
layout(binding = 0) uniform highp sampler2D depth;
|
||||
layout(binding = 1) uniform lowp usampler2D stencil;
|
||||
layout(rgba8, binding = 2) uniform highp writeonly image2D color;
|
||||
@@ -77,7 +77,7 @@ void D24S8toRGBA8::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
|
||||
|
||||
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
|
||||
glDispatchCompute(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1);
|
||||
glDispatchCompute(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
|
||||
|
||||
if (use_texture_view) {
|
||||
temp_tex.Release();
|
||||
|
@@ -319,6 +319,11 @@ const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(
|
||||
return reinterpreters[static_cast<u32>(dest_format)];
|
||||
}
|
||||
|
||||
bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const {
|
||||
return driver.IsOpenGLES() &&
|
||||
(format == VideoCore::PixelFormat::RGB8 || format == VideoCore::PixelFormat::RGBA8);
|
||||
}
|
||||
|
||||
void TextureRuntime::BindFramebuffer(GLenum target, GLint level, GLenum textarget,
|
||||
VideoCore::SurfaceType type, OGLTexture& texture) const {
|
||||
const GLint framebuffer = target == GL_DRAW_FRAMEBUFFER ? draw_fbo.handle : read_fbo.handle;
|
||||
|
@@ -97,6 +97,9 @@ public:
|
||||
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
|
||||
VideoCore::PixelFormat dest_format) const;
|
||||
|
||||
/// Returns true if the provided pixel format needs convertion
|
||||
[[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const;
|
||||
|
||||
private:
|
||||
/// Returns the framebuffer used for texture downloads
|
||||
void BindFramebuffer(GLenum target, GLint level, GLenum textarget, VideoCore::SurfaceType type,
|
||||
|
@@ -14,7 +14,7 @@ D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler,
|
||||
constexpr std::string_view cs_source = R"(
|
||||
#version 450 core
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
||||
layout(set = 0, binding = 0) uniform highp texture2D depth;
|
||||
layout(set = 0, binding = 1) uniform lowp utexture2D stencil;
|
||||
layout(set = 0, binding = 2, rgba8) uniform highp writeonly image2D color;
|
||||
@@ -154,7 +154,7 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
|
||||
command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
|
||||
sizeof(Common::Vec2i), src_offset.AsArray());
|
||||
|
||||
command_buffer.dispatch(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1);
|
||||
command_buffer.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -16,6 +16,8 @@
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
|
||||
@@ -93,7 +95,7 @@ constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
|
||||
return layout;
|
||||
}
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
constexpr u32 INDEX_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
@@ -121,10 +123,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
|
||||
|
||||
// Create a 1x1 clear texture to use in the NULL case,
|
||||
default_texture =
|
||||
runtime.Allocate(1, 1, VideoCore::PixelFormat::RGBA8, VideoCore::TextureType::Texture2D);
|
||||
runtime.Transition(scheduler.GetUploadCommandBuffer(), default_texture,
|
||||
vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
|
||||
CreateDefaultTextures();
|
||||
|
||||
uniform_block_data.lighting_lut_dirty.fill(true);
|
||||
|
||||
@@ -162,7 +161,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < 7; i++) {
|
||||
pipeline_cache.BindStorageImage(i, default_texture.image_view);
|
||||
pipeline_cache.BindStorageImage(i, default_storage_texture.image_view);
|
||||
}
|
||||
|
||||
// Explicitly call the derived version to avoid warnings about calling virtual
|
||||
@@ -174,6 +173,7 @@ RasterizerVulkan::~RasterizerVulkan() {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
|
||||
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
for (auto& [key, sampler] : samplers) {
|
||||
@@ -184,10 +184,10 @@ RasterizerVulkan::~RasterizerVulkan() {
|
||||
device.destroyFramebuffer(framebuffer);
|
||||
}
|
||||
|
||||
const VideoCore::HostTextureTag tag = {
|
||||
.format = VideoCore::PixelFormat::RGBA8, .width = 1, .height = 1};
|
||||
|
||||
runtime.Recycle(tag, std::move(default_texture));
|
||||
vmaDestroyImage(allocator, default_texture.image, default_texture.allocation);
|
||||
vmaDestroyImage(allocator, default_storage_texture.image, default_storage_texture.allocation);
|
||||
device.destroyImageView(default_texture.image_view);
|
||||
device.destroyImageView(default_storage_texture.image_view);
|
||||
device.destroySampler(default_sampler);
|
||||
}
|
||||
|
||||
@@ -374,7 +374,6 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
enable_attributes[input_reg] = true;
|
||||
offset += vertex_attributes.GetStride(attribute_index);
|
||||
}
|
||||
|
||||
} else {
|
||||
// Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings
|
||||
// respectively
|
||||
@@ -675,7 +674,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
if (surface != nullptr) {
|
||||
pipeline_cache.BindStorageImage(binding, surface->alloc.image_view);
|
||||
} else {
|
||||
pipeline_cache.BindStorageImage(binding, default_texture.image_view);
|
||||
pipeline_cache.BindStorageImage(binding, default_storage_texture.image_view);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -687,16 +686,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
// filters GL_LINEAR/GL_NEAREST so emulate them by setting minLod = 0, and maxLod = 0.25,
|
||||
// and using minFilter = VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST
|
||||
const bool skip_mipmap = config.type == Pica::TexturingRegs::TextureConfig::TextureCube;
|
||||
info =
|
||||
SamplerInfo{.mag_filter = config.mag_filter,
|
||||
.min_filter = config.min_filter,
|
||||
.mip_filter = config.mip_filter,
|
||||
.wrap_s = config.wrap_s,
|
||||
.wrap_t = config.wrap_t,
|
||||
.border_color = config.border_color.raw,
|
||||
.lod_min = skip_mipmap ? 0.f : static_cast<float>(config.lod.min_level),
|
||||
.lod_max = skip_mipmap ? 0.25f : static_cast<float>(config.lod.max_level),
|
||||
.lod_bias = static_cast<float>(config.lod.bias)};
|
||||
info = SamplerInfo{.mag_filter = config.mag_filter,
|
||||
.min_filter = config.min_filter,
|
||||
.mip_filter = config.mip_filter,
|
||||
.wrap_s = config.wrap_s,
|
||||
.wrap_t = config.wrap_t,
|
||||
.border_color = config.border_color.raw,
|
||||
.lod_min = skip_mipmap ? 0.f : static_cast<float>(config.lod.min_level),
|
||||
.lod_max = skip_mipmap ? 0.f : static_cast<float>(config.lod.max_level),
|
||||
.lod_bias = static_cast<float>(config.lod.bias)};
|
||||
|
||||
// Search the cache and bind the appropriate sampler
|
||||
if (auto it = samplers.find(info); it != samplers.end()) {
|
||||
@@ -708,8 +706,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
}
|
||||
};
|
||||
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
||||
// Sync and bind the texture surfaces
|
||||
const auto pica_textures = regs.texturing.GetTextures();
|
||||
for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
|
||||
@@ -722,9 +718,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
case TextureType::Shadow2D: {
|
||||
auto surface = res_cache.GetTextureSurface(texture);
|
||||
if (surface != nullptr) {
|
||||
pipeline_cache.BindStorageImage(0, surface->alloc.image_view);
|
||||
pipeline_cache.BindStorageImage(0, surface->GetImageView());
|
||||
} else {
|
||||
pipeline_cache.BindStorageImage(0, default_texture.image_view);
|
||||
pipeline_cache.BindStorageImage(0, default_storage_texture.image_view);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -874,13 +870,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
pipeline_cache.UseTrivialGeometryShader();
|
||||
pipeline_cache.BindPipeline(pipeline_info);
|
||||
|
||||
// Bind the vertex buffer at the current mapped offset. This effectively means
|
||||
// that when base_vertex is zero the GPU will start drawing from the current mapped
|
||||
// offset not the start of the buffer.
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(),
|
||||
vertex_buffer.GetBufferOffset());
|
||||
|
||||
const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex);
|
||||
const u32 batch_size = static_cast<u32>(vertex_batch.size());
|
||||
for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) {
|
||||
@@ -892,6 +881,12 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
|
||||
vertex_buffer.Commit(vertex_size);
|
||||
|
||||
// Bind the vertex buffer at the current mapped offset. This effectively means
|
||||
// that when base_vertex is zero the GPU will start drawing from the current mapped
|
||||
// offset not the start of the buffer.
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
|
||||
|
||||
command_buffer.draw(vertices, 1, base_vertex, 0);
|
||||
}
|
||||
}
|
||||
@@ -1642,6 +1637,66 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info)
|
||||
return device.createFramebuffer(framebuffer_info);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::CreateDefaultTextures() {
|
||||
default_texture.format = vk::Format::eR8G8B8A8Unorm;
|
||||
default_storage_texture.format = vk::Format::eR8G8B8A8Uint;
|
||||
|
||||
vk::ImageCreateInfo image_info = {.imageType = vk::ImageType::e2D,
|
||||
.format = default_texture.format,
|
||||
.extent = {1, 1, 1},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = GetImageUsage(vk::ImageAspectFlagBits::eColor)};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_info = {.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE};
|
||||
|
||||
VkImage unsafe_image{};
|
||||
VkImageCreateInfo& unsafe_image_info = static_cast<VkImageCreateInfo&>(image_info);
|
||||
|
||||
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
|
||||
&unsafe_image, &default_texture.allocation, nullptr);
|
||||
if (result != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed allocating default texture with error {}", result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
default_texture.image = vk::Image{unsafe_image};
|
||||
vk::ImageViewCreateInfo view_info = {
|
||||
.image = default_texture.image,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = default_texture.format,
|
||||
.subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1}};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
default_texture.image_view = device.createImageView(view_info);
|
||||
|
||||
// Define the default texture for storage descriptors
|
||||
image_info.format = default_storage_texture.format;
|
||||
result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, &unsafe_image,
|
||||
&default_storage_texture.allocation, nullptr);
|
||||
if (result != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed allocating default storage texture with error {}",
|
||||
result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
default_storage_texture.image = vk::Image{unsafe_image};
|
||||
|
||||
view_info.format = default_storage_texture.format;
|
||||
view_info.image = default_storage_texture.image;
|
||||
default_storage_texture.image_view = device.createImageView(view_info);
|
||||
|
||||
runtime.Transition(scheduler.GetUploadCommandBuffer(), default_texture,
|
||||
vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
|
||||
runtime.Transition(scheduler.GetUploadCommandBuffer(), default_storage_texture,
|
||||
vk::ImageLayout::eGeneral, 0, 1);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushBuffers() {
|
||||
vertex_buffer.Flush();
|
||||
uniform_buffer.Flush();
|
||||
|
@@ -222,6 +222,9 @@ private:
|
||||
/// Internal implementation for AccelerateDrawBatch
|
||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||
|
||||
/// Copies vertex data performing needed convertions and casts
|
||||
void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
@@ -246,6 +249,8 @@ private:
|
||||
/// Creates a new Vulkan framebuffer object
|
||||
vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info);
|
||||
|
||||
void CreateDefaultTextures();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
@@ -274,8 +279,9 @@ private:
|
||||
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
std::array<u64, 16> binding_offsets{};
|
||||
ImageAlloc default_texture;
|
||||
vk::Sampler default_sampler;
|
||||
ImageAlloc default_texture;
|
||||
ImageAlloc default_storage_texture;
|
||||
|
||||
struct {
|
||||
Pica::Shader::UniformData data{};
|
||||
|
@@ -1642,7 +1642,16 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_reg{0};\n", i, prefix);
|
||||
out +=
|
||||
fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix);
|
||||
}
|
||||
}
|
||||
out += '\n';
|
||||
|
||||
// cast input registers to float to avoid computational errors
|
||||
for (std::size_t i = 0; i < used_regs.size(); ++i) {
|
||||
if (used_regs[i]) {
|
||||
out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
|
||||
}
|
||||
}
|
||||
out += '\n';
|
||||
|
@@ -33,6 +33,15 @@ void Swapchain::Create(u32 width, u32 height) {
|
||||
is_outdated = false;
|
||||
is_suboptimal = false;
|
||||
|
||||
// Destroy the previous image views
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (auto& image : swapchain_images) {
|
||||
if (image.image) {
|
||||
device.destroyImageView(image.image_view);
|
||||
device.destroyFramebuffer(image.framebuffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch information about the provided surface
|
||||
Configure(width, height);
|
||||
|
||||
@@ -61,7 +70,6 @@ void Swapchain::Create(u32 width, u32 height) {
|
||||
.clipped = true,
|
||||
.oldSwapchain = swapchain};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
|
||||
|
||||
// If an old swapchain exists, destroy it and move the new one to its place.
|
||||
@@ -71,12 +79,6 @@ void Swapchain::Create(u32 width, u32 height) {
|
||||
|
||||
auto images = device.getSwapchainImagesKHR(swapchain);
|
||||
|
||||
// Destroy the previous images
|
||||
for (auto& image : swapchain_images) {
|
||||
device.destroyImageView(image.image_view);
|
||||
device.destroyFramebuffer(image.framebuffer);
|
||||
}
|
||||
|
||||
swapchain_images.clear();
|
||||
swapchain_images.resize(images.size());
|
||||
|
||||
@@ -113,6 +115,10 @@ void Swapchain::Create(u32 width, u32 height) {
|
||||
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
|
||||
|
||||
void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
|
||||
if (NeedsRecreation()) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
|
||||
VK_NULL_HANDLE, ¤t_image);
|
||||
@@ -132,6 +138,10 @@ void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
|
||||
}
|
||||
|
||||
void Swapchain::Present(vk::Semaphore wait_for_present) {
|
||||
if (NeedsRecreation()) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const vk::PresentInfoKHR present_info = {.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &wait_for_present,
|
||||
.swapchainCount = 1,
|
||||
@@ -145,6 +155,7 @@ void Swapchain::Present(vk::Semaphore wait_for_present) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
case vk::Result::eSuboptimalKHR:
|
||||
is_suboptimal = true;
|
||||
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
|
||||
break;
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
@@ -154,8 +165,6 @@ void Swapchain::Present(vk::Semaphore wait_for_present) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed");
|
||||
break;
|
||||
}
|
||||
|
||||
current_frame = (current_frame + 1) % swapchain_images.size();
|
||||
}
|
||||
|
||||
void Swapchain::Configure(u32 width, u32 height) {
|
||||
|
@@ -82,7 +82,6 @@ private:
|
||||
// Swapchain state
|
||||
std::vector<Image> swapchain_images;
|
||||
u32 current_image = 0;
|
||||
u32 current_frame = 0;
|
||||
bool is_outdated = true;
|
||||
bool is_suboptimal = true;
|
||||
};
|
||||
|
@@ -210,50 +210,28 @@ void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& a
|
||||
|
||||
void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
|
||||
std::span<std::byte> dest) {
|
||||
if (!surface.NeedsConvert()) {
|
||||
if (!NeedsConvertion(surface.pixel_format)) {
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
return;
|
||||
}
|
||||
|
||||
// Since this is the most common case handle it separately
|
||||
if (surface.pixel_format == VideoCore::PixelFormat::RGBA8) {
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
}
|
||||
|
||||
// Handle simple D24S8 interleave case
|
||||
if (surface.GetInternalFormat() == vk::Format::eD24UnormS8Uint) {
|
||||
if (!upload) {
|
||||
return Pica::Texture::InterleaveD24S8(source, dest);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
if (upload) {
|
||||
switch (surface.pixel_format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return Pica::Texture::ConvertBGRToRGBA(source, dest);
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return Pica::Texture::ConvertRGBA4ToRGBA8(source, dest);
|
||||
case VideoCore::PixelFormat::RGB5A1:
|
||||
return Pica::Texture::ConvertRGB5A1ToRGBA8(source, dest);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (surface.pixel_format) {
|
||||
case VideoCore::PixelFormat::D24S8:
|
||||
return Pica::Texture::ConvertD32S8ToD24S8(source, dest);
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest);
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return Pica::Texture::ConvertRGBAToBGR(source, dest);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_WARNING(Render_Vulkan, "Missing format convertion: {} {} {}",
|
||||
LOG_WARNING(Render_Vulkan, "Missing linear format convertion: {} {} {}",
|
||||
vk::to_string(surface.traits.native), upload ? "->" : "<-",
|
||||
vk::to_string(surface.alloc.format));
|
||||
}
|
||||
@@ -457,6 +435,14 @@ const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(
|
||||
return reinterpreters[static_cast<u32>(dest_format)];
|
||||
}
|
||||
|
||||
bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const {
|
||||
const FormatTraits traits = instance.GetTraits(format);
|
||||
const VideoCore::SurfaceType type = VideoCore::GetFormatType(format);
|
||||
return type == VideoCore::SurfaceType::Color &&
|
||||
(format == VideoCore::PixelFormat::RGBA8 || !traits.blit_support ||
|
||||
!traits.attachment_support);
|
||||
}
|
||||
|
||||
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
|
||||
vk::ImageLayout new_layout, u32 level, u32 level_count, u32 layer,
|
||||
u32 layer_count) {
|
||||
@@ -588,9 +574,12 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
||||
if (is_scaled) {
|
||||
ScaledUpload(upload);
|
||||
} else {
|
||||
u32 region_count = 0;
|
||||
std::array<vk::BufferImageCopy, 2> copy_regions;
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
const VideoCore::Rect2D rect = upload.texture_rect;
|
||||
const vk::BufferImageCopy copy_region = {
|
||||
vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging.buffer_offset,
|
||||
.bufferRowLength = rect.GetWidth(),
|
||||
.bufferImageHeight = rect.GetHeight(),
|
||||
@@ -601,11 +590,25 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
||||
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
|
||||
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
|
||||
|
||||
if (alloc.aspect & vk::ImageAspectFlagBits::eColor) {
|
||||
copy_regions[region_count++] = copy_region;
|
||||
} else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
|
||||
copy_regions[region_count++] = copy_region;
|
||||
|
||||
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
copy_region.bufferOffset += 4 * staging.size / 5;
|
||||
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
|
||||
copy_regions[region_count++] = copy_region;
|
||||
}
|
||||
}
|
||||
|
||||
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0,
|
||||
alloc.levels, 0,
|
||||
texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
|
||||
command_buffer.copyBufferToImage(staging.buffer, alloc.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, copy_region);
|
||||
vk::ImageLayout::eTransferDstOptimal, region_count,
|
||||
copy_regions.data());
|
||||
}
|
||||
|
||||
InvalidateAllWatcher();
|
||||
@@ -667,13 +670,6 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
runtime.staging_offsets[current_slot] += staging.size;
|
||||
}
|
||||
|
||||
bool Surface::NeedsConvert() const {
|
||||
// RGBA8 needs a byteswap since R8G8B8A8UnormPack32 does not exist
|
||||
// D24S8 always needs an interleave pass even if natively supported
|
||||
return alloc.format != traits.native || pixel_format == VideoCore::PixelFormat::RGBA8 ||
|
||||
pixel_format == VideoCore::PixelFormat::D24S8;
|
||||
}
|
||||
|
||||
u32 Surface::GetInternalBytesPerPixel() const {
|
||||
return vk::blockSize(alloc.format);
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ struct ImageAlloc {
|
||||
vk::ImageUsageFlags usage;
|
||||
vk::Format format;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
|
||||
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor;
|
||||
u32 levels = 1;
|
||||
u32 layers = 1;
|
||||
};
|
||||
@@ -92,6 +92,9 @@ public:
|
||||
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
|
||||
VideoCore::PixelFormat dest_format) const;
|
||||
|
||||
/// Returns true if the provided pixel format needs convertion
|
||||
[[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const;
|
||||
|
||||
/// Performs operations that need to be done on every scheduler slot switch
|
||||
void OnSlotSwitch(u32 new_slot);
|
||||
|
||||
@@ -131,9 +134,6 @@ public:
|
||||
/// Downloads pixel data to staging from a rectangle region of the surface texture
|
||||
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
|
||||
|
||||
/// Returns true if the surface requires pixel data convertion
|
||||
bool NeedsConvert() const;
|
||||
|
||||
/// Returns the bpp of the internal surface format
|
||||
u32 GetInternalBytesPerPixel() const;
|
||||
|
||||
|
@@ -289,6 +289,16 @@ void ConvertRGB5A1ToRGBA8(std::span<const std::byte> source, std::span<std::byte
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8ToRGB5A1(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 2) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba));
|
||||
Color::EncodeRGB5A1(rgba, reinterpret_cast<u8*>(dest.data() + i));
|
||||
j += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
std::size_t depth_offset = 0;
|
||||
std::size_t stencil_offset = 4 * source.size() / 5;
|
||||
@@ -316,4 +326,15 @@ void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> des
|
||||
}
|
||||
}
|
||||
|
||||
void DeinterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
std::size_t depth_offset = 0;
|
||||
std::size_t stencil_offset = 3 * source.size() / 4;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
dest[stencil_offset] = source[i];
|
||||
std::memcpy(dest.data() + depth_offset, source.data() + i + 1, 3);
|
||||
depth_offset += 3;
|
||||
stencil_offset += 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Pica::Texture
|
||||
|
@@ -87,8 +87,12 @@ void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte>
|
||||
|
||||
void ConvertRGB5A1ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGBA8ToRGB5A1(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void DeinterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
} // namespace Pica::Texture
|
||||
|
Reference in New Issue
Block a user