video_core: Rewrite to backend system
* Still doesn't build this is just a massive code dump from all the recent progress
This commit is contained in:
@ -157,6 +157,7 @@ set(REQUIRED_LIBRARIES
|
||||
inih
|
||||
lodepng
|
||||
glslang
|
||||
robin-hood-hashing
|
||||
zstd
|
||||
)
|
||||
|
||||
@ -297,6 +298,7 @@ set(REQUIRED_PACKAGES
|
||||
zstd
|
||||
unofficial-enet
|
||||
lodepng
|
||||
robin_hood
|
||||
)
|
||||
|
||||
foreach(PACKAGE ${REQUIRED_PACKAGES})
|
||||
|
@ -9,7 +9,7 @@ endif()
|
||||
# Configure vcpkg
|
||||
set(VCPKG_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/externals/vcpkg")
|
||||
if (WIN32)
|
||||
execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat")
|
||||
#execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat")
|
||||
set(VCPKG_EXECUTABLE "${VCPKG_DIRECTORY}/vcpkg.exe")
|
||||
else()
|
||||
execute_process(COMMAND bash "${VCPKG_DIRECTORY}/bootstrap-vcpkg.sh")
|
||||
|
@ -450,7 +450,7 @@ void DspLle::SetServiceToInterrupt(std::weak_ptr<Service::DSP::DSP_DSP> dsp) {
|
||||
return;
|
||||
if (pipe == 0) {
|
||||
// pipe 0 is for debug. 3DS automatically drains this pipe and discards the data
|
||||
impl->ReadPipe(pipe, impl->GetPipeReadableSize(pipe));
|
||||
impl->ReadPipe(static_cast<u8>(pipe), impl->GetPipeReadableSize(pipe));
|
||||
} else {
|
||||
std::lock_guard lock(HLE::g_hle_lock);
|
||||
if (auto locked = dsp.lock()) {
|
||||
|
@ -944,16 +944,14 @@ void Config::SaveMultiplayerValues() {
|
||||
// Write ban list
|
||||
qt_config->beginWriteArray(QStringLiteral("username_ban_list"));
|
||||
for (std::size_t i = 0; i < UISettings::values.ban_list.first.size(); ++i) {
|
||||
int index = static_cast<int>(i);
|
||||
qt_config->setArrayIndex(index);
|
||||
qt_config->setArrayIndex(static_cast<int>(i));
|
||||
WriteSetting(QStringLiteral("username"),
|
||||
QString::fromStdString(UISettings::values.ban_list.first[i]));
|
||||
}
|
||||
qt_config->endArray();
|
||||
qt_config->beginWriteArray(QStringLiteral("ip_ban_list"));
|
||||
for (std::size_t i = 0; i < UISettings::values.ban_list.second.size(); ++i) {
|
||||
int index = static_cast<int>(i);
|
||||
qt_config->setArrayIndex(index);
|
||||
qt_config->setArrayIndex(static_cast<int>(i));
|
||||
WriteSetting(QStringLiteral("ip"),
|
||||
QString::fromStdString(UISettings::values.ban_list.second[i]));
|
||||
}
|
||||
|
@ -256,8 +256,7 @@ void ConfigureCamera::SetConfiguration() {
|
||||
int index = GetSelectedCameraIndex();
|
||||
for (std::size_t i = 0; i < Implementations.size(); i++) {
|
||||
if (Implementations[i] == camera_name[index]) {
|
||||
int current_index = static_cast<int>(i);
|
||||
ui->image_source->setCurrentIndex(current_index);
|
||||
ui->image_source->setCurrentIndex(static_cast<int>(i));
|
||||
}
|
||||
}
|
||||
if (camera_name[index] == "image") {
|
||||
|
@ -76,7 +76,7 @@ void IPCRecorderWidget::OnEntryUpdated(IPCDebugger::RequestRecord record) {
|
||||
QTreeWidgetItem entry{
|
||||
{QString::number(record.id), GetStatusStr(record), service, GetFunctionName(record)}};
|
||||
|
||||
const int row_id = record.id - id_offset;
|
||||
const std::size_t row_id = record.id - id_offset;
|
||||
if (ui->main->invisibleRootItem()->childCount() > row_id) {
|
||||
records[row_id] = record;
|
||||
(*ui->main->invisibleRootItem()->child(row_id)) = entry;
|
||||
|
@ -45,7 +45,7 @@ private:
|
||||
// The offset between record id and row id, assuming record ids are assigned
|
||||
// continuously and only the 'Clear' action can be performed, this is enough.
|
||||
// The initial value is 1, which means record 1 = row 0.
|
||||
int id_offset = 1;
|
||||
std::size_t id_offset = 1;
|
||||
std::vector<IPCDebugger::RequestRecord> records;
|
||||
};
|
||||
|
||||
|
@ -60,6 +60,7 @@ add_library(common STATIC
|
||||
detached_tasks.cpp
|
||||
detached_tasks.h
|
||||
bit_field.h
|
||||
bit_field_array.h
|
||||
bit_set.h
|
||||
cityhash.cpp
|
||||
cityhash.h
|
||||
@ -72,6 +73,7 @@ add_library(common STATIC
|
||||
file_util.h
|
||||
flag.h
|
||||
hash.h
|
||||
intrusive_ptr.h
|
||||
linear_disk_cache.h
|
||||
logging/backend.cpp
|
||||
logging/backend.h
|
||||
@ -87,6 +89,8 @@ add_library(common STATIC
|
||||
microprofile.h
|
||||
microprofileui.h
|
||||
misc.cpp
|
||||
object_pool.cpp
|
||||
object_pool.h
|
||||
param_package.cpp
|
||||
param_package.h
|
||||
quaternion.h
|
||||
|
@ -36,6 +36,18 @@
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/swap.h"
|
||||
|
||||
// User defined types to need to specialize this
|
||||
template <typename T>
|
||||
struct MakeUnsigned {
|
||||
using type = std::make_unsigned_t<T>;
|
||||
};
|
||||
|
||||
// Ensure that user defined types are sane
|
||||
template <class T>
|
||||
concept ValidType = requires(T t) {
|
||||
static_cast<typename MakeUnsigned<T>::type>(t);
|
||||
};
|
||||
|
||||
/*
|
||||
* Abstract bitfield class
|
||||
*
|
||||
@ -110,6 +122,7 @@
|
||||
*/
|
||||
#pragma pack(1)
|
||||
template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
|
||||
requires ValidType<T>
|
||||
struct BitField {
|
||||
private:
|
||||
// UnderlyingType is T for non-enum types and the underlying type of T if
|
||||
@ -120,7 +133,7 @@ private:
|
||||
std::enable_if<true, T>>::type;
|
||||
|
||||
// We store the value as the unsigned type to avoid undefined behaviour on value shifting
|
||||
using StorageType = std::make_unsigned_t<UnderlyingType>;
|
||||
using StorageType = typename MakeUnsigned<UnderlyingType>::type;
|
||||
|
||||
using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
|
||||
|
||||
@ -199,3 +212,38 @@ private:
|
||||
|
||||
template <std::size_t Position, std::size_t Bits, typename T>
|
||||
using BitFieldBE = BitField<Position, Bits, T, BETag>;
|
||||
|
||||
/**
|
||||
* Abstract bit flag class. This is basically a specialization of BitField for single-bit fields.
|
||||
* Instead of being cast to the underlying type, it acts like a boolean.
|
||||
*/
|
||||
#pragma pack(1)
|
||||
template <std::size_t Position, typename T, typename EndianTag = LETag>
|
||||
struct BitFlag : protected BitField<Position, 1, T, EndianTag> {
|
||||
private:
|
||||
BitFlag(T val) = delete;
|
||||
|
||||
using ParentType = BitField<Position, 1, T>;
|
||||
|
||||
public:
|
||||
BitFlag() = default;
|
||||
BitFlag& operator=(const BitFlag&) = delete;
|
||||
|
||||
constexpr BitFlag& operator=(bool val) {
|
||||
Assign(val);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr void Assign(bool value) {
|
||||
ParentType::Assign(value);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr operator bool() const {
|
||||
return Value();
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr bool Value() const {
|
||||
return ParentType::Value() != 0;
|
||||
}
|
||||
};
|
||||
#pragma pack()
|
||||
|
287
src/common/bit_field_array.h
Normal file
287
src/common/bit_field_array.h
Normal file
@ -0,0 +1,287 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
#include "common/swap.h"
|
||||
|
||||
// Language limitations require the following to make these formattable
|
||||
// (formatter<BitFieldArray<position, bits, size, T>::Ref> is not legal)
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayConstRef;
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayRef;
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayConstIterator;
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayIterator;
|
||||
|
||||
#pragma pack(1)
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T,
|
||||
// StorageType is T for non-enum types and the underlying type of T if
|
||||
// T is an enumeration. Note that T is wrapped within an enable_if in the
|
||||
// former case to workaround compile errors which arise when using
|
||||
// std::underlying_type<T>::type directly.
|
||||
typename StorageType = typename std::conditional_t<
|
||||
std::is_enum<T>::value, std::underlying_type<T>, std::enable_if<true, T>>::type>
|
||||
struct BitFieldArray
|
||||
{
|
||||
using Ref = BitFieldArrayRef<position, bits, size, T, StorageType>;
|
||||
using ConstRef = BitFieldArrayConstRef<position, bits, size, T, StorageType>;
|
||||
using Iterator = BitFieldArrayIterator<position, bits, size, T, StorageType>;
|
||||
using ConstIterator = BitFieldArrayConstIterator<position, bits, size, T, StorageType>;
|
||||
|
||||
private:
|
||||
// This constructor might be considered ambiguous:
|
||||
// Would it initialize the storage or just the bitfield?
|
||||
// Hence, delete it. Use the assignment operator to set bitfield values!
|
||||
BitFieldArray(T val) = delete;
|
||||
|
||||
public:
|
||||
// Force default constructor to be created
|
||||
// so that we can use this within unions
|
||||
constexpr BitFieldArray() = default;
|
||||
|
||||
// Initializer list constructor
|
||||
constexpr BitFieldArray(std::initializer_list<T> items) : storage(StorageType{}) {
|
||||
u32 index = 0;
|
||||
for (auto& item : items) {
|
||||
SetValue(index++, item);
|
||||
}
|
||||
}
|
||||
|
||||
// We explicitly delete the copy assignment operator here, because the
|
||||
// default copy assignment would copy the full storage value, rather than
|
||||
// just the bits relevant to this particular bit field.
|
||||
// Ideally, we would just implement the copy assignment to copy only the
|
||||
// relevant bits, but we're prevented from doing that because the savestate
|
||||
// code expects that this class is trivially copyable.
|
||||
BitFieldArray& operator=(const BitFieldArray&) = delete;
|
||||
|
||||
public:
|
||||
constexpr bool IsSigned() const { return std::is_signed<T>(); }
|
||||
constexpr std::size_t StartBit() const { return position; }
|
||||
constexpr std::size_t NumBits() const { return bits; }
|
||||
constexpr std::size_t Size() const { return size; }
|
||||
constexpr std::size_t TotalNumBits() const { return bits * size; }
|
||||
|
||||
constexpr T Value(size_t index) const { return Value(std::is_signed<T>(), index); }
|
||||
constexpr void SetValue(size_t index, T value) {
|
||||
const size_t pos = position + bits * index;
|
||||
storage = (storage & ~GetElementMask(index)) |
|
||||
((static_cast<StorageType>(value) << pos) & GetElementMask(index));
|
||||
}
|
||||
Ref operator[](size_t index) { return Ref(this, index); }
|
||||
constexpr const ConstRef operator[](size_t index) const { return ConstRef(this, index); }
|
||||
|
||||
constexpr Iterator begin() { return Iterator(this, 0); }
|
||||
constexpr Iterator end() { return Iterator(this, size); }
|
||||
constexpr ConstIterator begin() const { return ConstIterator(this, 0); }
|
||||
constexpr ConstIterator end() const { return ConstIterator(this, size); }
|
||||
constexpr ConstIterator cbegin() const { return begin(); }
|
||||
constexpr ConstIterator cend() const { return end(); }
|
||||
|
||||
private:
|
||||
// Unsigned version of StorageType
|
||||
using StorageTypeU = std::make_unsigned_t<StorageType>;
|
||||
|
||||
constexpr T Value(std::true_type, size_t index) const
|
||||
{
|
||||
const size_t pos = position + bits * index;
|
||||
const size_t shift_amount = 8 * sizeof(StorageType) - bits;
|
||||
return static_cast<T>((storage << (shift_amount - pos)) >> shift_amount);
|
||||
}
|
||||
|
||||
constexpr T Value(std::false_type, size_t index) const
|
||||
{
|
||||
const size_t pos = position + bits * index;
|
||||
return static_cast<T>((storage & GetElementMask(index)) >> pos);
|
||||
}
|
||||
|
||||
static constexpr StorageType GetElementMask(size_t index)
|
||||
{
|
||||
const size_t pos = position + bits * index;
|
||||
return (std::numeric_limits<StorageTypeU>::max() >> (8 * sizeof(StorageType) - bits)) << pos;
|
||||
}
|
||||
|
||||
StorageType storage;
|
||||
|
||||
static_assert(bits * size + position <= 8 * sizeof(StorageType), "Bitfield array out of range");
|
||||
static_assert(sizeof(T) <= sizeof(StorageType), "T must fit in StorageType");
|
||||
|
||||
// And, you know, just in case people specify something stupid like bits=position=0x80000000
|
||||
static_assert(position < 8 * sizeof(StorageType), "Invalid position");
|
||||
static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
|
||||
static_assert(bits > 0, "Invalid number of bits");
|
||||
static_assert(size <= 8 * sizeof(StorageType), "Invalid size");
|
||||
static_assert(size > 0, "Invalid size");
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayConstRef
|
||||
{
|
||||
friend struct BitFieldArray<position, bits, size, T, S>;
|
||||
friend class BitFieldArrayConstIterator<position, bits, size, T, S>;
|
||||
|
||||
public:
|
||||
constexpr T Value() const { return m_array->Value(m_index); };
|
||||
constexpr operator T() const { return Value(); }
|
||||
|
||||
private:
|
||||
constexpr BitFieldArrayConstRef(const BitFieldArray<position, bits, size, T, S>* array,
|
||||
size_t index)
|
||||
: m_array(array), m_index(index)
|
||||
{
|
||||
}
|
||||
|
||||
const BitFieldArray<position, bits, size, T, S>* const m_array;
|
||||
const size_t m_index;
|
||||
};
|
||||
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayRef
|
||||
{
|
||||
friend struct BitFieldArray<position, bits, size, T, S>;
|
||||
friend class BitFieldArrayIterator<position, bits, size, T, S>;
|
||||
|
||||
public:
|
||||
constexpr T Value() const { return m_array->Value(m_index); };
|
||||
constexpr operator T() const { return Value(); }
|
||||
T operator=(const BitFieldArrayRef<position, bits, size, T, S>& value) const
|
||||
{
|
||||
m_array->SetValue(m_index, value);
|
||||
return value;
|
||||
}
|
||||
T operator=(T value) const
|
||||
{
|
||||
m_array->SetValue(m_index, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
private:
|
||||
constexpr BitFieldArrayRef(BitFieldArray<position, bits, size, T, S>* array, size_t index)
|
||||
: m_array(array), m_index(index)
|
||||
{
|
||||
}
|
||||
|
||||
BitFieldArray<position, bits, size, T, S>* const m_array;
|
||||
const size_t m_index;
|
||||
};
|
||||
|
||||
// Satisfies LegacyOutputIterator / std::output_iterator.
|
||||
// Does not satisfy LegacyInputIterator / std::input_iterator as std::output_iterator_tag does not
|
||||
// extend std::input_iterator_tag.
|
||||
// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real
|
||||
// references instead of proxy objects.
|
||||
// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join.
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayIterator
|
||||
{
|
||||
friend struct BitFieldArray<position, bits, size, T, S>;
|
||||
|
||||
public:
|
||||
using iterator_category = std::output_iterator_tag;
|
||||
using value_type = T;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = void;
|
||||
using reference = BitFieldArrayRef<position, bits, size, T, S>;
|
||||
|
||||
private:
|
||||
constexpr BitFieldArrayIterator(BitFieldArray<position, bits, size, T, S>* array, size_t index)
|
||||
: m_array(array), m_index(index)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
// Required by std::input_or_output_iterator
|
||||
constexpr BitFieldArrayIterator() = default;
|
||||
// Required by LegacyIterator
|
||||
constexpr BitFieldArrayIterator(const BitFieldArrayIterator& other) = default;
|
||||
// Required by LegacyIterator
|
||||
BitFieldArrayIterator& operator=(const BitFieldArrayIterator& other) = default;
|
||||
// Move constructor and assignment operators, explicitly defined for completeness
|
||||
constexpr BitFieldArrayIterator(BitFieldArrayIterator&& other) = default;
|
||||
BitFieldArrayIterator& operator=(BitFieldArrayIterator&& other) = default;
|
||||
|
||||
public:
|
||||
BitFieldArrayIterator& operator++()
|
||||
{
|
||||
m_index++;
|
||||
return *this;
|
||||
}
|
||||
BitFieldArrayIterator operator++(int)
|
||||
{
|
||||
BitFieldArrayIterator other(*this);
|
||||
++*this;
|
||||
return other;
|
||||
}
|
||||
constexpr reference operator*() const { return reference(m_array, m_index); }
|
||||
constexpr bool operator==(BitFieldArrayIterator other) const { return m_index == other.m_index; }
|
||||
constexpr bool operator!=(BitFieldArrayIterator other) const { return m_index != other.m_index; }
|
||||
|
||||
private:
|
||||
BitFieldArray<position, bits, size, T, S>* m_array;
|
||||
size_t m_index;
|
||||
};
|
||||
|
||||
// Satisfies LegacyInputIterator / std::input_iterator.
|
||||
// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real
|
||||
// references instead of proxy objects.
|
||||
// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join.
|
||||
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
|
||||
class BitFieldArrayConstIterator
|
||||
{
|
||||
friend struct BitFieldArray<position, bits, size, T, S>;
|
||||
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = T;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = void;
|
||||
using reference = BitFieldArrayConstRef<position, bits, size, T, S>;
|
||||
|
||||
private:
|
||||
constexpr BitFieldArrayConstIterator(const BitFieldArray<position, bits, size, T, S>* array,
|
||||
size_t index)
|
||||
: m_array(array), m_index(index)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
// Required by std::input_or_output_iterator
|
||||
constexpr BitFieldArrayConstIterator() = default;
|
||||
// Required by LegacyIterator
|
||||
constexpr BitFieldArrayConstIterator(const BitFieldArrayConstIterator& other) = default;
|
||||
// Required by LegacyIterator
|
||||
BitFieldArrayConstIterator& operator=(const BitFieldArrayConstIterator& other) = default;
|
||||
// Move constructor and assignment operators, explicitly defined for completeness
|
||||
constexpr BitFieldArrayConstIterator(BitFieldArrayConstIterator&& other) = default;
|
||||
BitFieldArrayConstIterator& operator=(BitFieldArrayConstIterator&& other) = default;
|
||||
|
||||
public:
|
||||
BitFieldArrayConstIterator& operator++()
|
||||
{
|
||||
m_index++;
|
||||
return *this;
|
||||
}
|
||||
BitFieldArrayConstIterator operator++(int)
|
||||
{
|
||||
BitFieldArrayConstIterator other(*this);
|
||||
++*this;
|
||||
return other;
|
||||
}
|
||||
constexpr reference operator*() const { return reference(m_array, m_index); }
|
||||
constexpr bool operator==(BitFieldArrayConstIterator other) const
|
||||
{
|
||||
return m_index == other.m_index;
|
||||
}
|
||||
constexpr bool operator!=(BitFieldArrayConstIterator other) const
|
||||
{
|
||||
return m_index != other.m_index;
|
||||
}
|
||||
|
||||
private:
|
||||
const BitFieldArray<position, bits, size, T, S>* m_array;
|
||||
size_t m_index;
|
||||
};
|
@ -15,11 +15,19 @@
|
||||
#define CONCAT2(x, y) DO_CONCAT2(x, y)
|
||||
#define DO_CONCAT2(x, y) x##y
|
||||
|
||||
// helper macro to properly align structure members.
|
||||
// Calling INSERT_PADDING_BYTES will add a new member variable with a name like "pad121",
|
||||
// depending on the current source line to make sure variable names are unique.
|
||||
#define INSERT_PADDING_BYTES(num_bytes) u8 CONCAT2(pad, __LINE__)[(num_bytes)]
|
||||
#define INSERT_PADDING_WORDS(num_words) u32 CONCAT2(pad, __LINE__)[(num_words)]
|
||||
/// Helper macros to insert unused bytes or words to properly align structs. These values will be
|
||||
/// zero-initialized.
|
||||
#define INSERT_PADDING_BYTES(num_bytes) \
|
||||
[[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__) {}
|
||||
#define INSERT_PADDING_WORDS(num_words) \
|
||||
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__) {}
|
||||
|
||||
/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents.
|
||||
/// This keeps the structure trivial to construct.
|
||||
#define INSERT_PADDING_BYTES_NOINIT(num_bytes) \
|
||||
[[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
|
||||
#define INSERT_PADDING_WORDS_NOINIT(num_words) \
|
||||
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
|
||||
|
||||
// Inlining
|
||||
#ifdef _WIN32
|
||||
|
@ -11,6 +11,15 @@
|
||||
|
||||
namespace Common {
|
||||
|
||||
/**
|
||||
* Disables rehashing for std::unordered_map
|
||||
*/
|
||||
struct IdentityHash {
|
||||
u64 operator()(const u64 hash) const {
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Computes a 64-bit hash over the specified block of data
|
||||
* @param data Block of data to compute hash over
|
||||
@ -33,6 +42,14 @@ static inline u64 ComputeStructHash64(const T& data) noexcept {
|
||||
return ComputeHash64(&data, sizeof(data));
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines hash lhs with hash rhs providing a unique result.
|
||||
*/
|
||||
static inline std::size_t HashCombine(std::size_t lhs, std::size_t rhs) noexcept {
|
||||
lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2);
|
||||
return lhs;
|
||||
}
|
||||
|
||||
/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
|
||||
template <typename T>
|
||||
struct HashableStruct {
|
||||
|
261
src/common/intrusive_ptr.h
Normal file
261
src/common/intrusive_ptr.h
Normal file
@ -0,0 +1,261 @@
|
||||
/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
#include <type_traits>
|
||||
|
||||
/// Simple reference counter for single threaded environments
|
||||
class SingleThreadCounter {
|
||||
public:
|
||||
inline void AddRef() {
|
||||
count++;
|
||||
}
|
||||
|
||||
inline bool Release() {
|
||||
return --count == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
std::size_t count = 1;
|
||||
};
|
||||
|
||||
/// Thread-safe reference counter with atomics
|
||||
class MultiThreadCounter {
|
||||
public:
|
||||
MultiThreadCounter() {
|
||||
count.store(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
inline void AddRef() {
|
||||
count.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
inline bool Release() {
|
||||
auto result = count.fetch_sub(1, std::memory_order_acq_rel);
|
||||
return result == 1;
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic_size_t count;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class IntrusivePtr;
|
||||
|
||||
template <typename T, typename Deleter = std::default_delete<T>,
|
||||
typename ReferenceOps = SingleThreadCounter>
|
||||
class IntrusivePtrEnabled {
|
||||
public:
|
||||
using IntrusivePtrType = IntrusivePtr<T>;
|
||||
using EnabledBase = T;
|
||||
using EnabledDeleter = Deleter;
|
||||
using EnabledReferenceOp = ReferenceOps;
|
||||
|
||||
IntrusivePtrEnabled() = default;
|
||||
IntrusivePtrEnabled(const IntrusivePtrEnabled &) = delete;
|
||||
void operator=(const IntrusivePtrEnabled &) = delete;
|
||||
|
||||
/// Decrement the reference counter and optionally free the memory
|
||||
inline void ReleaseRef() {
|
||||
if (ref_counter.Release()) {
|
||||
Deleter()(static_cast<T*>(this));
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment the reference counter
|
||||
inline void AddRef() {
|
||||
ref_counter.AddRef();
|
||||
}
|
||||
|
||||
protected:
|
||||
IntrusivePtr<T> RefFromThis();
|
||||
|
||||
private:
|
||||
ReferenceOps ref_counter;
|
||||
};
|
||||
|
||||
/**
|
||||
* Lightweight alternative to std::shared_ptr for reference counting
|
||||
* usecases
|
||||
*/
|
||||
template <typename T>
|
||||
class IntrusivePtr {
|
||||
using ReferenceBase = IntrusivePtrEnabled<
|
||||
typename T::EnabledBase,
|
||||
typename T::EnabledDeleter,
|
||||
typename T::EnabledReferenceOp>;
|
||||
|
||||
template <typename U>
|
||||
friend class IntrusivePtr;
|
||||
public:
|
||||
IntrusivePtr() = default;
|
||||
explicit IntrusivePtr(T *handle) : data(handle) {}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr(const IntrusivePtr<U> &other) {
|
||||
*this = other;
|
||||
}
|
||||
|
||||
IntrusivePtr(const IntrusivePtr &other) {
|
||||
*this = other;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr(IntrusivePtr<U> &&other) noexcept {
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
IntrusivePtr(IntrusivePtr &&other) noexcept {
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
~IntrusivePtr() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
/// Returns a reference to the underlying data
|
||||
T& operator*() {
|
||||
return *data;
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the underlying data
|
||||
const T& operator*() const {
|
||||
return *data;
|
||||
}
|
||||
|
||||
/// Returns a pointer to the underlying data
|
||||
T* operator->() {
|
||||
return data;
|
||||
}
|
||||
|
||||
/// Returns an immutable pointer to the underlying data
|
||||
const T* operator->() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
/// Returns true if the underlaying pointer it valid
|
||||
bool IsValid() const {
|
||||
return data != nullptr;
|
||||
}
|
||||
|
||||
/// Default comparison operators
|
||||
auto operator<=>(const IntrusivePtr& other) const = default;
|
||||
|
||||
/// Returns the raw pointer to the data
|
||||
T* Get() {
|
||||
return data;
|
||||
}
|
||||
|
||||
/// Returns an immutable raw pointer to the data
|
||||
const T* Get() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
// Static up-cast here to avoid potential issues with multiple intrusive inheritance.
|
||||
// Also makes sure that the pointer type actually inherits from this type.
|
||||
if (data)
|
||||
static_cast<ReferenceBase*>(data)->ReleaseRef();
|
||||
data = nullptr;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr& operator=(const IntrusivePtr<U>& other) {
|
||||
static_assert(std::is_base_of_v<T, U>, "Cannot safely assign downcasted intrusive pointers.");
|
||||
|
||||
Reset();
|
||||
data = static_cast<T*>(other.data);
|
||||
|
||||
// Static up-cast here to avoid potential issues with multiple intrusive inheritance.
|
||||
// Also makes sure that the pointer type actually inherits from this type.
|
||||
if (data) {
|
||||
static_cast<ReferenceBase*>(data)->ReleaseRef();
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntrusivePtr& operator=(const IntrusivePtr& other) {
|
||||
if (this != &other) {
|
||||
Reset();
|
||||
data = other.data;
|
||||
if (data)
|
||||
static_cast<ReferenceBase*>(data)->AddRef();
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr &operator=(IntrusivePtr<U> &&other) noexcept {
|
||||
Reset();
|
||||
data = std::exchange(other.data, nullptr);
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntrusivePtr &operator=(IntrusivePtr &&other) noexcept {
|
||||
if (this != &other) {
|
||||
Reset();
|
||||
data = std::exchange(other.data, nullptr);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
T* Release() & {
|
||||
return std::exchange(data, nullptr);
|
||||
}
|
||||
|
||||
T* Release() && {
|
||||
return std::exchange(data, nullptr);
|
||||
}
|
||||
|
||||
private:
|
||||
T* data = nullptr;
|
||||
};
|
||||
|
||||
template <typename T, typename Deleter, typename ReferenceOps>
|
||||
IntrusivePtr<T> IntrusivePtrEnabled<T, Deleter, ReferenceOps>::RefFromThis() {
|
||||
AddRef();
|
||||
return IntrusivePtr<T>(static_cast<T*>(this));
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
using DerivedIntrusivePtrType = IntrusivePtr<Derived>;
|
||||
|
||||
template <typename T, typename... P>
|
||||
DerivedIntrusivePtrType<T> MakeHandle(P &&... p) {
|
||||
return DerivedIntrusivePtrType<T>(new T(std::forward<P>(p)...));
|
||||
}
|
||||
|
||||
template <typename Base, typename Derived, typename... P>
|
||||
typename Base::IntrusivePtrType MakeDerivedHandle(P &&... p) {
|
||||
return typename Base::IntrusivePtrType(new Derived(std::forward<P>(p)...));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ThreadSafeIntrusivePtrEnabled = IntrusivePtrEnabled<T, std::default_delete<T>, MultiThreadCounter>;
|
70
src/common/object_pool.cpp
Normal file
70
src/common/object_pool.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
void* memalign_alloc(size_t boundary, size_t size) {
|
||||
#if defined(_WIN32)
|
||||
return _aligned_malloc(size, boundary);
|
||||
#elif defined(_ISOC11_SOURCE)
|
||||
return aligned_alloc(boundary, size);
|
||||
#elif (_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600)
|
||||
void *ptr = nullptr;
|
||||
if (posix_memalign(&ptr, boundary, size) < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return ptr;
|
||||
#else
|
||||
// Align stuff ourselves. Kinda ugly, but will work anywhere.
|
||||
void **place;
|
||||
uintptr_t addr = 0;
|
||||
void *ptr = malloc(boundary + size + sizeof(uintptr_t));
|
||||
|
||||
if (ptr == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
addr = ((uintptr_t)ptr + sizeof(uintptr_t) + boundary) & ~(boundary - 1);
|
||||
place = (void **) addr;
|
||||
place[-1] = ptr;
|
||||
|
||||
return (void *) addr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void memalign_free(void *ptr) {
|
||||
#if defined(_WIN32)
|
||||
_aligned_free(ptr);
|
||||
#elif !defined(_ISOC11_SOURCE) && !((_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600))
|
||||
if (ptr != nullptr) {
|
||||
void **p = (void **) ptr;
|
||||
free(p[-1]);
|
||||
}
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
148
src/common/object_pool.h
Normal file
148
src/common/object_pool.h
Normal file
@ -0,0 +1,148 @@
|
||||
/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
||||
void *memalign_alloc(size_t boundary, size_t size);
|
||||
void memalign_free(void *ptr);
|
||||
|
||||
template <typename T>
|
||||
struct AlignedAllocation {
|
||||
static void* operator new(size_t size) {
|
||||
void* ret = memalign_alloc(alignof(T), size);
|
||||
if (!ret) throw std::bad_alloc();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void* operator new[](size_t size) {
|
||||
void* ret = memalign_alloc(alignof(T), size);
|
||||
if (!ret) throw std::bad_alloc();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void operator delete(void *ptr) {
|
||||
return memalign_free(ptr);
|
||||
}
|
||||
|
||||
static void operator delete[](void *ptr) {
|
||||
return memalign_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Allocates objects of type T in batches of 64 * n where
|
||||
* n is the number of times the pool has grown. So the first
|
||||
* time it will allocate 64, then 128 objects etc.
|
||||
*/
|
||||
template<typename T>
|
||||
class ObjectPool {
|
||||
public:
|
||||
template<typename... P>
|
||||
T* Allocate(P&&... p) {
|
||||
#ifndef OBJECT_POOL_DEBUG
|
||||
if (vacants.empty()) {
|
||||
unsigned num_objects = 64u << memory.size();
|
||||
T *ptr = static_cast<T*>(memalign_alloc(std::max(64, alignof(T)),
|
||||
num_objects * sizeof(T)));
|
||||
if (!ptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_objects; i++) {
|
||||
vacants.push_back(&ptr[i]);
|
||||
}
|
||||
|
||||
memory.emplace_back(ptr);
|
||||
}
|
||||
|
||||
T *ptr = vacants.back();
|
||||
vacants.pop_back();
|
||||
new(ptr) T(std::forward<P>(p)...);
|
||||
return ptr;
|
||||
#else
|
||||
return new T(std::forward<P>(p)...);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Free(T *ptr) {
|
||||
#ifndef OBJECT_POOL_DEBUG
|
||||
ptr->~T();
|
||||
vacants.push_back(ptr);
|
||||
#else
|
||||
delete ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
#ifndef OBJECT_POOL_DEBUG
|
||||
vacants.clear();
|
||||
memory.clear();
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
#ifndef OBJECT_POOL_DEBUG
|
||||
std::vector<T*> vacants;
|
||||
|
||||
struct MallocDeleter {
|
||||
void operator()(T *ptr) {
|
||||
memalign_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<std::unique_ptr<T, MallocDeleter>> memory;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class ThreadSafeObjectPool : private ObjectPool<T> {
|
||||
public:
|
||||
template<typename... P>
|
||||
T* Allocate(P &&... p) {
|
||||
std::lock_guard<std::mutex> holder{lock};
|
||||
return ObjectPool<T>::Allocate(std::forward<P>(p)...);
|
||||
}
|
||||
|
||||
void Free(T *ptr) {
|
||||
#ifndef OBJECT_POOL_DEBUG
|
||||
ptr->~T();
|
||||
std::lock_guard<std::mutex> holder{lock};
|
||||
this->vacants.push_back(ptr);
|
||||
#else
|
||||
delete ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
std::lock_guard<std::mutex> holder{lock};
|
||||
ObjectPool<T>::Clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex lock;
|
||||
};
|
@ -158,10 +158,10 @@ struct ABIFrameInfo {
|
||||
|
||||
inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment,
|
||||
std::size_t needed_frame_size) {
|
||||
int count = (regs & ABI_ALL_GPRS).count();
|
||||
std::size_t count = (regs & ABI_ALL_GPRS).count();
|
||||
rsp_alignment -= count * 8;
|
||||
std::size_t subtraction = 0;
|
||||
int xmm_count = (regs & ABI_ALL_XMMS).count();
|
||||
std::size_t xmm_count = (regs & ABI_ALL_XMMS).count();
|
||||
if (xmm_count) {
|
||||
// If we have any XMMs to save, we must align the stack here.
|
||||
subtraction = rsp_alignment & 0xF;
|
||||
|
@ -29,11 +29,9 @@ enum class LayoutOption {
|
||||
SingleScreen,
|
||||
LargeScreen,
|
||||
SideScreen,
|
||||
|
||||
// Similiar to default, but better for mobile devices in portrait mode. Top screen in clamped to
|
||||
// the top of the frame, and the bottom screen is enlarged to match the top screen.
|
||||
MobilePortrait,
|
||||
|
||||
// Similiar to LargeScreen, but better for mobile devices in landscape mode. The screens are
|
||||
// clamped to the top of the frame, and the bottom screen is a bit bigger.
|
||||
MobileLandscape,
|
||||
@ -116,7 +114,6 @@ namespace NativeAnalog {
|
||||
enum Values {
|
||||
CirclePad,
|
||||
CStick,
|
||||
|
||||
NumAnalogs,
|
||||
};
|
||||
|
||||
|
@ -9,12 +9,12 @@ add_library(video_core STATIC
|
||||
pica.cpp
|
||||
pica.h
|
||||
pica_state.h
|
||||
pica_types.h
|
||||
primitive_assembly.cpp
|
||||
primitive_assembly.h
|
||||
rasterizer_interface.h
|
||||
regs.cpp
|
||||
regs.h
|
||||
pica_regs.inc
|
||||
pica.cpp
|
||||
pica.h
|
||||
regs_framebuffer.h
|
||||
regs_lighting.h
|
||||
regs_pipeline.h
|
||||
@ -23,6 +23,15 @@ add_library(video_core STATIC
|
||||
regs_texturing.h
|
||||
renderer_base.cpp
|
||||
renderer_base.h
|
||||
common/backend.h
|
||||
common/buffer.h
|
||||
common/framebuffer.h
|
||||
common/pica_types.h
|
||||
common/shader_gen.cpp
|
||||
common/shader_gen.h
|
||||
common/shader.h
|
||||
common/texture.h
|
||||
common/pipeline.h
|
||||
renderer_opengl/frame_dumper_opengl.cpp
|
||||
renderer_opengl/frame_dumper_opengl.h
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
@ -73,16 +82,21 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/pica_to_vulkan.h
|
||||
renderer_vulkan/renderer_vulkan.cpp
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/vk_backend.cpp
|
||||
renderer_vulkan/vk_backend.h
|
||||
renderer_vulkan/vk_buffer.cpp
|
||||
renderer_vulkan/vk_buffer.h
|
||||
renderer_vulkan/vk_common.cpp
|
||||
renderer_vulkan/vk_common.h
|
||||
renderer_vulkan/vk_format_reinterpreter.cpp
|
||||
renderer_vulkan/vk_format_reinterpreter.h
|
||||
renderer_vulkan/vk_format_util.cpp
|
||||
renderer_vulkan/vk_format_util.h
|
||||
renderer_vulkan/vk_instance.cpp
|
||||
renderer_vulkan/vk_instance.h
|
||||
renderer_vulkan/vk_pipeline_builder.cpp
|
||||
renderer_vulkan/vk_pipeline_builder.h
|
||||
renderer_vulkan/vk_pipeline.cpp
|
||||
renderer_vulkan/vk_pipeline.h
|
||||
renderer_vulkan/vk_platform.h
|
||||
renderer_vulkan/vk_rasterizer_cache.cpp
|
||||
renderer_vulkan/vk_rasterizer_cache.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
@ -90,6 +104,8 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/vk_shader_state.h
|
||||
renderer_vulkan/vk_shader_gen.cpp
|
||||
renderer_vulkan/vk_shader_gen.h
|
||||
renderer_vulkan/vk_shader.cpp
|
||||
renderer_vulkan/vk_shader.h
|
||||
renderer_vulkan/vk_state.cpp
|
||||
renderer_vulkan/vk_state.h
|
||||
renderer_vulkan/vk_surface_params.cpp
|
||||
@ -180,7 +196,7 @@ target_link_libraries(video_core PRIVATE glad::glad glm::glm nihstro-headers Boo
|
||||
# Include Vulkan headers
|
||||
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
|
||||
target_include_directories(video_core PRIVATE ../../externals/vma/include)
|
||||
target_link_libraries(video_core PRIVATE glslang SPIRV glslang-default-resource-limits OGLCompiler)
|
||||
target_link_libraries(video_core PRIVATE glslang SPIRV robin_hood::robin_hood)
|
||||
|
||||
if (ARCHITECTURE_x86_64)
|
||||
target_link_libraries(video_core PUBLIC xbyak::xbyak)
|
||||
|
60
src/video_core/common/backend.h
Normal file
60
src/video_core/common/backend.h
Normal file
@ -0,0 +1,60 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/common/pipeline.h"
|
||||
#include "video_core/common/framebuffer.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/// Common interface of a video backend
|
||||
class BackendBase {
|
||||
public:
|
||||
BackendBase(Frontend::EmuWindow& window) : window(window) {}
|
||||
virtual ~BackendBase() = default;
|
||||
|
||||
// Triggers a swapchain buffer swap
|
||||
virtual void SwapBuffers();
|
||||
|
||||
// Creates a backend specific texture handle
|
||||
virtual TextureHandle CreateTexture(TextureInfo info) = 0;
|
||||
|
||||
// Creates a backend specific buffer handle
|
||||
virtual BufferHandle CreateBuffer(BufferInfo info) = 0;
|
||||
|
||||
// Creates a backend specific framebuffer handle
|
||||
virtual FramebufferHandle CreateFramebuffer(FramebufferInfo info) = 0;
|
||||
|
||||
// Creates a backend specific pipeline handle
|
||||
virtual PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) = 0;
|
||||
|
||||
// Creates a backend specific sampler object
|
||||
virtual SamplerHandle CreateSampler(SamplerInfo info) = 0;
|
||||
|
||||
// Start a draw operation
|
||||
virtual void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
|
||||
BufferHandle vertex_buffer,
|
||||
u32 base_vertex, u32 num_vertices) = 0;
|
||||
|
||||
// Start an indexed draw operation
|
||||
virtual void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
|
||||
BufferHandle vertex_buffer, BufferHandle index_buffer,
|
||||
u32 base_index, u32 num_indices, u32 base_vertex) = 0;
|
||||
|
||||
// Executes a compute shader
|
||||
virtual void DispatchCompute(PipelineHandle pipeline, Common::Vec3<u32> groupsize,
|
||||
Common::Vec3<u32> groups) = 0;
|
||||
|
||||
private:
|
||||
Frontend::EmuWindow& window;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
102
src/video_core/common/buffer.h
Normal file
102
src/video_core/common/buffer.h
Normal file
@ -0,0 +1,102 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include "common/hash.h"
|
||||
#include "common/intrusive_ptr.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class BufferUsage : u8 {
|
||||
Vertex = 0,
|
||||
Index = 1,
|
||||
Uniform = 2,
|
||||
Texel = 3,
|
||||
Staging = 4,
|
||||
Undefined = 255
|
||||
};
|
||||
|
||||
enum class ViewFormat : u8 {
|
||||
R32Float = 0,
|
||||
R32G32Float = 1,
|
||||
R32G32B32Float = 2,
|
||||
R32G32B32A32Float = 3,
|
||||
Undefined = 255
|
||||
};
|
||||
|
||||
constexpr u32 MAX_BUFFER_VIEWS = 3;
|
||||
|
||||
struct BufferInfo {
|
||||
u32 capacity = 0;
|
||||
BufferUsage usage = BufferUsage::Undefined;
|
||||
std::array<ViewFormat, MAX_BUFFER_VIEWS> views{ViewFormat::Undefined};
|
||||
|
||||
const u64 Hash() const {
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(BufferInfo) == 8, "BufferInfo not packed!");
|
||||
static_assert(std::is_standard_layout_v<BufferInfo>, "BufferInfo is not a standard layout!");
|
||||
|
||||
class BufferBase : public IntrusivePtrEnabled<BufferBase> {
|
||||
public:
|
||||
BufferBase() = default;
|
||||
BufferBase(const BufferInfo& info) : info(info) {}
|
||||
virtual ~BufferBase() = default;
|
||||
|
||||
/// Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
/// and the optional alignment requirement.
|
||||
/// The actual used size must be specified on unmapping the chunk.
|
||||
virtual std::span<u8> Map(u32 size, u32 alignment = 0) {};
|
||||
|
||||
/// Flushes write to buffer memory
|
||||
virtual void Commit(u32 size = 0) {};
|
||||
|
||||
/// Returns the size of the buffer in bytes
|
||||
u32 GetCapacity() const {
|
||||
return info.capacity;
|
||||
}
|
||||
|
||||
/// Returns the usage of the buffer
|
||||
BufferUsage GetUsage() const {
|
||||
return info.usage;
|
||||
}
|
||||
|
||||
/// Returns the starting offset of the currently mapped buffer slice
|
||||
u64 GetCurrentOffset() const {
|
||||
return buffer_offset;
|
||||
}
|
||||
|
||||
/// Returns whether the buffer was invalidated by the most recent Map call
|
||||
bool IsInvalid() const {
|
||||
return invalid;
|
||||
}
|
||||
|
||||
/// Invalidates the buffer
|
||||
void Invalidate() {
|
||||
buffer_offset = 0;
|
||||
invalid = true;
|
||||
}
|
||||
|
||||
protected:
|
||||
BufferInfo info{};
|
||||
u32 buffer_offset = 0;
|
||||
bool invalid = false;
|
||||
};
|
||||
|
||||
using BufferHandle = IntrusivePtr<BufferBase>;
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<VideoCore::BufferInfo> {
|
||||
std::size_t operator()(const VideoCore::BufferInfo& info) const noexcept {
|
||||
return info.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
69
src/video_core/common/framebuffer.h
Normal file
69
src/video_core/common/framebuffer.h
Normal file
@ -0,0 +1,69 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/common/texture.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class MSAASamples : u32 {
|
||||
x1,
|
||||
x2,
|
||||
x4,
|
||||
x8
|
||||
};
|
||||
|
||||
/**
|
||||
* Information about a framebuffer
|
||||
*/
|
||||
struct FramebufferInfo {
|
||||
TextureHandle color;
|
||||
TextureHandle depth_stencil;
|
||||
MSAASamples samples = MSAASamples::x1;
|
||||
Rect2D draw_rect{};
|
||||
|
||||
/// Hashes the framebuffer object and returns a unique identifier
|
||||
const u64 Hash() const {
|
||||
// The only member IntrusivePtr has is a pointer to the
|
||||
// handle so it's fine hash it
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A framebuffer is a collection of render targets and their configuration
|
||||
*/
|
||||
class FramebufferBase : public IntrusivePtrEnabled<FramebufferBase> {
|
||||
public:
|
||||
FramebufferBase(const FramebufferInfo& info) : info(info) {}
|
||||
virtual ~FramebufferBase() = default;
|
||||
|
||||
/// Returns an immutable reference to the color attachment
|
||||
const TextureHandle& GetColorAttachment() const {
|
||||
return info.color;
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the depth/stencil attachment
|
||||
const TextureHandle& GetDepthStencilAttachment() const {
|
||||
return info.depth_stencil;
|
||||
}
|
||||
|
||||
/// Returns how many samples the framebuffer takes
|
||||
MSAASamples GetMSAASamples() const {
|
||||
return info.samples;
|
||||
}
|
||||
|
||||
/// Returns the rendering area
|
||||
Rect2D GetDrawRectangle() const {
|
||||
return info.draw_rect;
|
||||
}
|
||||
|
||||
protected:
|
||||
FramebufferInfo info;
|
||||
};
|
||||
|
||||
using FramebufferHandle = IntrusivePtr<FramebufferBase>;
|
||||
|
||||
} // namespace VideoCore
|
157
src/video_core/common/pica_types.h
Normal file
157
src/video_core/common/pica_types.h
Normal file
@ -0,0 +1,157 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <boost/serialization/access.hpp>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
/**
|
||||
* Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision
|
||||
* floating point.
|
||||
*
|
||||
* When decoding, format is as follows:
|
||||
* - The first `M` bits are the mantissa
|
||||
* - The next `E` bits are the exponent
|
||||
* - The last bit is the sign bit
|
||||
*
|
||||
* @todo Verify on HW if this conversion is sufficiently accurate.
|
||||
*/
|
||||
template <u32 M, u32 E>
|
||||
struct Float {
|
||||
static constexpr u32 width = M + E + 1;
|
||||
static constexpr u32 bias = 128 - (1 << (E - 1));
|
||||
static constexpr u32 exponent_mask = (1 << E) - 1;
|
||||
static constexpr u32 mantissa_mask = (1 << M) - 1;
|
||||
static constexpr u32 sign_mask = 1 << (E + M);
|
||||
public:
|
||||
static Float FromFloat32(float val) {
|
||||
Float ret;
|
||||
ret.value = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static Float FromRaw(u32 hex) {
|
||||
Float res;
|
||||
|
||||
u32 exponent = (hex >> M) & exponent_mask;
|
||||
const u32 mantissa = hex & mantissa_mask;
|
||||
const u32 sign = (hex & sign_mask) << (31 - M - E);
|
||||
|
||||
if (hex & (mantissa_mask | (exponent_mask << M))) {
|
||||
if (exponent == exponent_mask) {
|
||||
exponent = 255;
|
||||
} else {
|
||||
exponent += bias;
|
||||
}
|
||||
|
||||
hex = sign | (mantissa << (23 - M)) | (exponent << 23);
|
||||
} else {
|
||||
hex = sign;
|
||||
}
|
||||
|
||||
std::memcpy(&res.value, &hex, sizeof(float));
|
||||
return res;
|
||||
}
|
||||
|
||||
static Float Zero() {
|
||||
return FromFloat32(0.f);
|
||||
}
|
||||
|
||||
// Not recommended for anything but logging
|
||||
float ToFloat32() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
Float operator*(const Float& flt) const {
|
||||
float result = value * flt.ToFloat32();
|
||||
// PICA gives 0 instead of NaN when multiplying by inf
|
||||
if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) {
|
||||
result = 0.f;
|
||||
}
|
||||
|
||||
return Float::FromFloat32(result);
|
||||
}
|
||||
|
||||
Float operator/(const Float& flt) const {
|
||||
return Float::FromFloat32(ToFloat32() / flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float operator+(const Float& flt) const {
|
||||
return Float::FromFloat32(ToFloat32() + flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float operator-(const Float& flt) const {
|
||||
return Float::FromFloat32(ToFloat32() - flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float& operator*=(const Float& flt) {
|
||||
value = operator*(flt).value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float& operator/=(const Float& flt) {
|
||||
value /= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float& operator+=(const Float& flt) {
|
||||
value += flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float& operator-=(const Float& flt) {
|
||||
value -= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float operator-() const {
|
||||
return Float::FromFloat32(-ToFloat32());
|
||||
}
|
||||
|
||||
bool operator<(const Float& flt) const {
|
||||
return ToFloat32() < flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator>(const Float& flt) const {
|
||||
return ToFloat32() > flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator>=(const Float& flt) const {
|
||||
return ToFloat32() >= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator<=(const Float& flt) const {
|
||||
return ToFloat32() <= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator==(const Float& flt) const {
|
||||
return ToFloat32() == flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator!=(const Float& flt) const {
|
||||
return ToFloat32() != flt.ToFloat32();
|
||||
}
|
||||
|
||||
private:
|
||||
// Stored as a regular float, merely for convenience
|
||||
// TODO: Perform proper arithmetic on this!
|
||||
float value;
|
||||
|
||||
friend class boost::serialization::access;
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar, const unsigned int file_version) {
|
||||
ar& value;
|
||||
}
|
||||
};
|
||||
|
||||
using Float24 = Float<16, 7>;
|
||||
using Float20 = Float<12, 7>;
|
||||
using Float16 = Float<10, 5>;
|
||||
|
||||
} // namespace Pica
|
223
src/video_core/common/pipeline.h
Normal file
223
src/video_core/common/pipeline.h
Normal file
@ -0,0 +1,223 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/bit_field_array.h"
|
||||
#include "common/hash.h"
|
||||
#include "video_core/common/buffer.h"
|
||||
#include "video_core/common/texture.h"
|
||||
#include "video_core/common/shader.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
#include "video_core/regs_pipeline.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
constexpr u32 MAX_SHADER_STAGES = 3;
|
||||
constexpr u32 MAX_VERTEX_ATTRIBUTES = 8;
|
||||
constexpr u32 MAX_BINDINGS_IN_GROUP = 7;
|
||||
constexpr u32 MAX_BINDING_GROUPS = 6;
|
||||
|
||||
enum class PipelineType : u8 {
|
||||
Compute = 0,
|
||||
Graphics = 1
|
||||
};
|
||||
|
||||
enum class BindingType : u32 {
|
||||
None = 0,
|
||||
Uniform = 1,
|
||||
UniformDynamic = 2,
|
||||
TexelBuffer = 3,
|
||||
Texture = 4,
|
||||
Sampler = 5,
|
||||
StorageImage = 6
|
||||
};
|
||||
|
||||
using BindingGroup = BitFieldArray<0, 3, MAX_BINDINGS_IN_GROUP, BindingType>;
|
||||
|
||||
/**
|
||||
* Describes all the resources used in the pipeline
|
||||
*/
|
||||
struct PipelineLayoutInfo {
|
||||
u8 group_count = 0;
|
||||
std::array<BindingGroup, MAX_BINDING_GROUPS> binding_groups{};
|
||||
u8 push_constant_block_size = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* The pipeline state is tightly packed with bitfields to reduce
|
||||
* the overhead of hashing as much as possible
|
||||
*/
|
||||
union RasterizationState {
|
||||
u8 value = 0;
|
||||
BitField<0, 2, Pica::TriangleTopology> topology;
|
||||
BitField<4, 2, Pica::CullMode> cull_mode;
|
||||
};
|
||||
|
||||
union DepthStencilState {
|
||||
u64 value = 0;
|
||||
BitField<0, 1, u64> depth_test_enable;
|
||||
BitField<1, 1, u64> depth_write_enable;
|
||||
BitField<2, 1, u64> stencil_test_enable;
|
||||
BitField<3, 3, Pica::CompareFunc> depth_compare_op;
|
||||
BitField<6, 3, Pica::StencilAction> stencil_fail_op;
|
||||
BitField<9, 3, Pica::StencilAction> stencil_pass_op;
|
||||
BitField<12, 3, Pica::StencilAction> stencil_depth_fail_op;
|
||||
BitField<15, 3, Pica::CompareFunc> stencil_compare_op;
|
||||
BitField<18, 8, u64> stencil_reference;
|
||||
BitField<26, 8, u64> stencil_compare_mask;
|
||||
BitField<34, 8, u64> stencil_write_mask;
|
||||
};
|
||||
|
||||
union BlendState {
|
||||
u32 value = 0;
|
||||
BitField<0, 4, Pica::BlendFactor> src_color_blend_factor;
|
||||
BitField<4, 4, Pica::BlendFactor> dst_color_blend_factor;
|
||||
BitField<8, 3, Pica::BlendEquation> color_blend_eq;
|
||||
BitField<11, 4, Pica::BlendFactor> src_alpha_blend_factor;
|
||||
BitField<15, 4, Pica::BlendFactor> dst_alpha_blend_factor;
|
||||
BitField<19, 3, Pica::BlendEquation> alpha_blend_eq;
|
||||
BitField<22, 4, u32> color_write_mask;
|
||||
};
|
||||
|
||||
enum class AttribType : u8 {
|
||||
Float = 0,
|
||||
Int = 1,
|
||||
Short = 2
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u8 value = 0;
|
||||
BitField<0, 2, AttribType> type;
|
||||
BitField<2, 3, u8> components;
|
||||
};
|
||||
|
||||
#pragma pack(1)
|
||||
struct VertexLayout {
|
||||
u8 stride = 0;
|
||||
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
/**
|
||||
* Information about a graphics/compute pipeline
|
||||
*/
|
||||
#pragma pack(1)
|
||||
struct PipelineInfo {
|
||||
std::array<ShaderHandle, MAX_SHADER_STAGES> shaders{};
|
||||
VertexLayout vertex_layout{};
|
||||
PipelineLayoutInfo layout{};
|
||||
BlendState blending{};
|
||||
DepthStencilState depth_stencil{};
|
||||
RasterizationState rasterization{};
|
||||
|
||||
const u64 Hash() const {
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
class PipelineBase : public IntrusivePtrEnabled<PipelineBase> {
|
||||
public:
|
||||
PipelineBase(PipelineType type, PipelineInfo info) :
|
||||
type(type), info(info) {}
|
||||
virtual ~PipelineBase() = default;
|
||||
|
||||
// Disable copy constructor
|
||||
PipelineBase(const PipelineBase&) = delete;
|
||||
PipelineBase& operator=(const PipelineBase&) = delete;
|
||||
|
||||
// Binds the texture in the specified slot
|
||||
virtual void BindTexture(u32 group, u32 slot, TextureHandle handle) = 0;
|
||||
|
||||
// Binds the texture in the specified slot
|
||||
virtual void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) = 0;
|
||||
|
||||
// Binds the sampler in the specified slot
|
||||
virtual void BindSampler(u32 group, u32 slot, SamplerHandle handle) = 0;
|
||||
|
||||
/// Sets the primitive topology
|
||||
void SetTopology(Pica::TriangleTopology topology) {
|
||||
info.rasterization.topology.Assign(topology);
|
||||
}
|
||||
|
||||
/// Sets the culling mode
|
||||
void SetCullMode(Pica::CullMode mode) {
|
||||
info.rasterization.cull_mode.Assign(mode);
|
||||
}
|
||||
|
||||
/// Configures the color blending function
|
||||
void SetColorBlendFunc(Pica::BlendFactor src_color_factor,
|
||||
Pica::BlendFactor dst_color_factor,
|
||||
Pica::BlendEquation color_eq) {
|
||||
info.blending.src_color_blend_factor.Assign(src_color_factor);
|
||||
info.blending.dst_color_blend_factor.Assign(dst_color_factor);
|
||||
info.blending.color_blend_eq.Assign(color_eq);
|
||||
}
|
||||
|
||||
/// Configures the alpha blending function
|
||||
void SetAlphaBlendFunc(Pica::BlendFactor src_alpha_factor,
|
||||
Pica::BlendFactor dst_alpha_factor,
|
||||
Pica::BlendEquation alpha_eq) {
|
||||
info.blending.src_alpha_blend_factor.Assign(src_alpha_factor);
|
||||
info.blending.dst_alpha_blend_factor.Assign(dst_alpha_factor);
|
||||
info.blending.alpha_blend_eq.Assign(alpha_eq);
|
||||
}
|
||||
|
||||
/// Sets the color write mask
|
||||
void SetColorWriteMask(u32 mask) {
|
||||
info.blending.color_write_mask.Assign(mask);
|
||||
}
|
||||
|
||||
/// Configures the depth test
|
||||
void SetDepthTest(bool enable, Pica::CompareFunc compare_op) {
|
||||
info.depth_stencil.depth_test_enable.Assign(enable);
|
||||
info.depth_stencil.depth_compare_op.Assign(compare_op);
|
||||
}
|
||||
|
||||
/// Enables or disables depth writes
|
||||
void SetDepthWrites(bool enable) {
|
||||
info.depth_stencil.depth_write_enable.Assign(enable);
|
||||
}
|
||||
|
||||
/// Configures the stencil test
|
||||
void SetStencilTest(bool enable, Pica::StencilAction fail, Pica::StencilAction pass,
|
||||
Pica::StencilAction depth_fail, Pica::CompareFunc compare, u32 ref) {
|
||||
info.depth_stencil.stencil_test_enable.Assign(enable);
|
||||
info.depth_stencil.stencil_fail_op.Assign(fail);
|
||||
info.depth_stencil.stencil_pass_op.Assign(pass);
|
||||
info.depth_stencil.stencil_depth_fail_op.Assign(depth_fail);
|
||||
info.depth_stencil.stencil_compare_op.Assign(compare);
|
||||
info.depth_stencil.stencil_reference.Assign(ref);
|
||||
}
|
||||
|
||||
/// Selects the bits of the stencil values participating in the stencil test
|
||||
void SetStencilCompareMask(u32 mask) {
|
||||
info.depth_stencil.stencil_compare_mask.Assign(mask);
|
||||
}
|
||||
|
||||
/// Selects the bits of the stencil values updated by the stencil test
|
||||
void SetStencilWriteMask(u32 mask) {
|
||||
info.depth_stencil.stencil_write_mask.Assign(mask);
|
||||
}
|
||||
|
||||
protected:
|
||||
PipelineType type = PipelineType::Graphics;
|
||||
PipelineInfo info{};
|
||||
};
|
||||
|
||||
using PipelineHandle = IntrusivePtr<PipelineBase>;
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<VideoCore::PipelineInfo> {
|
||||
std::size_t operator()(const VideoCore::PipelineInfo& info) const noexcept {
|
||||
return info.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
62
src/video_core/common/shader.h
Normal file
62
src/video_core/common/shader.h
Normal file
@ -0,0 +1,62 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "common/intrusive_ptr.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class ShaderStage : u32 {
|
||||
Vertex = 0,
|
||||
Geometry = 1,
|
||||
Fragment = 2,
|
||||
Compute = 3,
|
||||
Undefined = 4
|
||||
};
|
||||
|
||||
// Tells the module how much to optimize the bytecode
|
||||
enum class ShaderOptimization : u32 {
|
||||
High = 0,
|
||||
Debug = 1
|
||||
};
|
||||
|
||||
/// Compiles shader source to backend representation
|
||||
class ShaderBase : public IntrusivePtrEnabled<ShaderBase> {
|
||||
public:
|
||||
ShaderBase(ShaderStage stage, std::string_view name, std::string&& source) :
|
||||
name(name), stage(stage), source(source) {}
|
||||
virtual ~ShaderBase() = default;
|
||||
|
||||
/// Compiles the shader source code
|
||||
virtual bool Compile(ShaderOptimization level) = 0;
|
||||
|
||||
/// Returns the API specific shader bytecode
|
||||
std::string_view GetSource() const {
|
||||
return source;
|
||||
}
|
||||
|
||||
/// Returns the name given the shader module
|
||||
std::string_view GetName() const {
|
||||
return name;
|
||||
}
|
||||
|
||||
/// Returns the pipeline stage the shader is assigned to
|
||||
ShaderStage GetStage() const {
|
||||
return stage;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string_view name = "None";
|
||||
ShaderStage stage = ShaderStage::Undefined;
|
||||
std::string source;
|
||||
};
|
||||
|
||||
using ShaderHandle = IntrusivePtr<ShaderBase>;
|
||||
|
||||
} // namespace VideoCore
|
179
src/video_core/common/shader_gen.cpp
Normal file
179
src/video_core/common/shader_gen.cpp
Normal file
@ -0,0 +1,179 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_set.h"
|
||||
#include "video_core/video_core.h"
|
||||
#include "video_core/common/shader_gen.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
PicaFSConfig::PicaFSConfig(const Pica::Regs& regs) {
|
||||
scissor_test_mode = regs.rasterizer.scissor_test.mode;
|
||||
depthmap_enable = regs.rasterizer.depthmap_enable;
|
||||
alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
|
||||
? regs.framebuffer.output_merger.alpha_test.func.Value()
|
||||
: Pica::CompareFunc::Always;
|
||||
texture0_type = regs.texturing.texture0.type;
|
||||
texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
|
||||
|
||||
// We don't need these otherwise, reset them to avoid unnecessary shader generation
|
||||
alphablend_enable = {};
|
||||
logic_op = {};
|
||||
|
||||
// Copy relevant tev stages fields.
|
||||
// We don't sync const_color here because of the high variance, it is a
|
||||
// shader uniform instead.
|
||||
const auto stages = regs.texturing.GetTevStages();
|
||||
DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
|
||||
for (std::size_t i = 0; i < stages.size(); i++) {
|
||||
const auto& tev_stage = stages[i];
|
||||
tev_stages[i].sources_raw = tev_stage.sources_raw;
|
||||
tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
|
||||
tev_stages[i].ops_raw = tev_stage.ops_raw;
|
||||
tev_stages[i].scales_raw = tev_stage.scales_raw;
|
||||
}
|
||||
|
||||
fog_mode = regs.texturing.fog_mode;
|
||||
fog_flip = regs.texturing.fog_flip != 0;
|
||||
|
||||
combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
|
||||
regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
|
||||
<< 4;
|
||||
|
||||
// Fragment lighting
|
||||
lighting.enable = !regs.lighting.disable;
|
||||
lighting.src_num = regs.lighting.max_light_index + 1;
|
||||
|
||||
for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) {
|
||||
u32 num = regs.lighting.light_enable.GetNum(light_index);
|
||||
const auto& light = regs.lighting.light[num];
|
||||
auto& dst_light = lighting.light[light_index];
|
||||
|
||||
dst_light.num = num;
|
||||
dst_light.directional = light.config.directional != 0;
|
||||
dst_light.two_sided_diffuse = light.config.two_sided_diffuse != 0;
|
||||
dst_light.geometric_factor_0 = light.config.geometric_factor_0 != 0;
|
||||
dst_light.geometric_factor_1 = light.config.geometric_factor_1 != 0;
|
||||
dst_light.dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
|
||||
dst_light.spot_atten_enable = !regs.lighting.IsSpotAttenDisabled(num);
|
||||
dst_light.shadow_enable = !regs.lighting.IsShadowDisabled(num);
|
||||
}
|
||||
|
||||
lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
|
||||
lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
|
||||
lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
|
||||
lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
|
||||
|
||||
lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
|
||||
lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
|
||||
lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
|
||||
lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
|
||||
|
||||
// This is a dummy field due to lack of the corresponding register
|
||||
lighting.lut_sp.enable = true;
|
||||
lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
|
||||
lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
|
||||
lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
|
||||
|
||||
lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
|
||||
lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
|
||||
lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
|
||||
lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
|
||||
|
||||
lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
|
||||
lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
|
||||
lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
|
||||
lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
|
||||
|
||||
lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
|
||||
lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
|
||||
lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
|
||||
lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
|
||||
|
||||
lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
|
||||
lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
|
||||
lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
|
||||
lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
|
||||
|
||||
lighting.config = regs.lighting.config0.config;
|
||||
lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
|
||||
lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
|
||||
lighting.bump_mode = regs.lighting.config0.bump_mode;
|
||||
lighting.bump_selector = regs.lighting.config0.bump_selector;
|
||||
lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
|
||||
lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
|
||||
|
||||
lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
|
||||
lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
|
||||
lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
|
||||
lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
|
||||
lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
|
||||
lighting.shadow_selector = regs.lighting.config0.shadow_selector;
|
||||
|
||||
proctex.enable = regs.texturing.main_config.texture3_enable;
|
||||
if (proctex.enable) {
|
||||
proctex.coord = regs.texturing.main_config.texture3_coordinates;
|
||||
proctex.u_clamp = regs.texturing.proctex.u_clamp;
|
||||
proctex.v_clamp = regs.texturing.proctex.v_clamp;
|
||||
proctex.color_combiner = regs.texturing.proctex.color_combiner;
|
||||
proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
|
||||
proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
|
||||
proctex.noise_enable = regs.texturing.proctex.noise_enable;
|
||||
proctex.u_shift = regs.texturing.proctex.u_shift;
|
||||
proctex.v_shift = regs.texturing.proctex.v_shift;
|
||||
proctex.lut_width = regs.texturing.proctex_lut.width;
|
||||
proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
|
||||
proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
|
||||
proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
|
||||
proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
|
||||
proctex.lod_min = regs.texturing.proctex_lut.lod_min;
|
||||
proctex.lod_max = regs.texturing.proctex_lut.lod_max;
|
||||
proctex.lut_filter = regs.texturing.proctex_lut.filter;
|
||||
}
|
||||
|
||||
shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
|
||||
Pica::FragmentOperationMode::Shadow;
|
||||
|
||||
shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
|
||||
}
|
||||
|
||||
PicaVSConfig::PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
program_hash = setup.GetProgramCodeHash();
|
||||
swizzle_hash = setup.GetSwizzleDataHash();
|
||||
main_offset = regs.main_offset;
|
||||
sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
|
||||
|
||||
num_outputs = 0;
|
||||
output_map.fill(16);
|
||||
|
||||
for (int reg : Common::BitSet<u32>(regs.output_mask)) {
|
||||
output_map[reg] = num_outputs++;
|
||||
}
|
||||
}
|
||||
|
||||
PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs) {
|
||||
vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
|
||||
gs_output_attributes = vs_output_attributes;
|
||||
|
||||
semantic_maps.fill({16, 0});
|
||||
for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
|
||||
const std::array semantics = {
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
|
||||
};
|
||||
|
||||
for (u32 comp = 0; comp < 4; ++comp) {
|
||||
const std::size_t semantic = static_cast<std::size_t>(semantics[comp]);
|
||||
if (semantic < 24) {
|
||||
semantic_maps[semantic] = {attrib, comp};
|
||||
} else if (semantic != Pica::RasterizerRegs::VSOutputAttributes::INVALID) {
|
||||
LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
227
src/video_core/common/shader_gen.h
Normal file
227
src/video_core/common/shader_gen.h
Normal file
@ -0,0 +1,227 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include "common/hash.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
ATTRIBUTE_COLOR,
|
||||
ATTRIBUTE_TEXCOORD0,
|
||||
ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2,
|
||||
ATTRIBUTE_TEXCOORD0_W,
|
||||
ATTRIBUTE_NORMQUAT,
|
||||
ATTRIBUTE_VIEW,
|
||||
};
|
||||
|
||||
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
|
||||
struct TevStageConfigRaw {
|
||||
u32 sources_raw;
|
||||
u32 modifiers_raw;
|
||||
u32 ops_raw;
|
||||
u32 scales_raw;
|
||||
|
||||
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
|
||||
Pica::TexturingRegs::TevStageConfig stage;
|
||||
stage.sources_raw = sources_raw;
|
||||
stage.modifiers_raw = modifiers_raw;
|
||||
stage.ops_raw = ops_raw;
|
||||
stage.const_color = 0;
|
||||
stage.scales_raw = scales_raw;
|
||||
return stage;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains all state used to generate the GLSL fragment shader that emulates the
|
||||
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
|
||||
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
|
||||
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
|
||||
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
|
||||
* two separate shaders sharing the same key.
|
||||
*/
|
||||
struct PicaFSConfig {
|
||||
explicit PicaFSConfig(const Pica::Regs& regs);
|
||||
|
||||
/// Returns the hash of the VS config
|
||||
const u64 Hash() const noexcept {
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
|
||||
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||
return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index));
|
||||
}
|
||||
|
||||
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
|
||||
return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index));
|
||||
}
|
||||
|
||||
Pica::CompareFunc alpha_test_func;
|
||||
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
|
||||
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
|
||||
std::array<TevStageConfigRaw, 6> tev_stages;
|
||||
bool texture2_use_coord1;
|
||||
u8 combiner_buffer_input;
|
||||
|
||||
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
|
||||
Pica::TexturingRegs::FogMode fog_mode;
|
||||
bool fog_flip;
|
||||
bool alphablend_enable;
|
||||
Pica::LogicOp logic_op;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
unsigned num;
|
||||
bool directional;
|
||||
bool two_sided_diffuse;
|
||||
bool dist_atten_enable;
|
||||
bool spot_atten_enable;
|
||||
bool geometric_factor_0;
|
||||
bool geometric_factor_1;
|
||||
bool shadow_enable;
|
||||
} light[8];
|
||||
|
||||
bool enable;
|
||||
unsigned src_num;
|
||||
Pica::LightingRegs::LightingBumpMode bump_mode;
|
||||
unsigned bump_selector;
|
||||
bool bump_renorm;
|
||||
bool clamp_highlights;
|
||||
|
||||
Pica::LightingRegs::LightingConfig config;
|
||||
bool enable_primary_alpha;
|
||||
bool enable_secondary_alpha;
|
||||
|
||||
bool enable_shadow;
|
||||
bool shadow_primary;
|
||||
bool shadow_secondary;
|
||||
bool shadow_invert;
|
||||
bool shadow_alpha;
|
||||
unsigned shadow_selector;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
bool abs_input;
|
||||
Pica::LightingRegs::LightingLutInput type;
|
||||
float scale;
|
||||
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
u32 coord;
|
||||
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
|
||||
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
|
||||
bool separate_alpha;
|
||||
bool noise_enable;
|
||||
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
|
||||
u32 lut_width;
|
||||
u32 lut_offset0;
|
||||
u32 lut_offset1;
|
||||
u32 lut_offset2;
|
||||
u32 lut_offset3;
|
||||
u32 lod_min;
|
||||
u32 lod_max;
|
||||
Pica::TexturingRegs::ProcTexFilter lut_filter;
|
||||
} proctex;
|
||||
|
||||
bool shadow_rendering;
|
||||
bool shadow_texture_orthographic;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a host vertex shader generated from PICA vertex
|
||||
* shader.
|
||||
*/
|
||||
struct PicaVSConfig {
|
||||
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
/// Returns the hash of the VS config
|
||||
const u64 Hash() const noexcept {
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
|
||||
u64 program_hash = 0;
|
||||
u64 swizzle_hash = 0;
|
||||
u32 main_offset = 0;
|
||||
bool sanitize_mul = false;
|
||||
|
||||
// output_map[output register index] -> output attribute index
|
||||
u32 num_outputs = 0;
|
||||
std::array<u32, 16> output_map{};
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
|
||||
* shader pipeline
|
||||
*/
|
||||
struct PicaFixedGSConfig {
|
||||
explicit PicaFixedGSConfig(const Pica::Regs& regs);
|
||||
|
||||
/// Returns the hash of the GS config
|
||||
const u64 Hash() const noexcept {
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
|
||||
u32 vs_output_attributes = 0;
|
||||
u32 gs_output_attributes = 0;
|
||||
|
||||
struct SemanticMap {
|
||||
u32 attribute_index = 0;
|
||||
u32 component_index = 0;
|
||||
};
|
||||
|
||||
// semantic_maps[semantic name] -> GS output attribute index + component index
|
||||
std::array<SemanticMap, 24> semantic_maps{};
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates backend specific shader modules using the Pica state configuration
|
||||
* @todo Be replaced with a unified shader compiler
|
||||
*/
|
||||
class ShaderGeneratorBase {
|
||||
public:
|
||||
ShaderGeneratorBase() = default;
|
||||
virtual ~ShaderGeneratorBase() = default;
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
|
||||
* and directly passes them to the fragment shader.
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
virtual std::string GenerateTrivialVertexShader(bool separable_shader) = 0;
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code for the given VS program
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
virtual std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
|
||||
bool separable_shader) = 0;
|
||||
|
||||
/**
|
||||
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
virtual std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) = 0;
|
||||
|
||||
/**
|
||||
* Generates the GLSL fragment shader program source code for the current Pica state
|
||||
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
|
||||
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
virtual std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) = 0;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
171
src/video_core/common/texture.h
Normal file
171
src/video_core/common/texture.h
Normal file
@ -0,0 +1,171 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include "common/hash.h"
|
||||
#include "common/intrusive_ptr.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
constexpr u32 MAX_COLOR_FORMATS = 5;
|
||||
constexpr u32 MAX_DEPTH_FORMATS = 3;
|
||||
|
||||
enum class TextureFormat : u8 {
|
||||
RGBA8 = 0,
|
||||
RGB8 = 1,
|
||||
RGB5A1 = 2,
|
||||
RGB565 = 3,
|
||||
RGBA4 = 4,
|
||||
D16 = 5,
|
||||
D24 = 6,
|
||||
D24S8 = 7,
|
||||
Undefined = 255
|
||||
};
|
||||
|
||||
enum class TextureType : u8 {
|
||||
Texture1D = 0,
|
||||
Texture2D = 1,
|
||||
Texture3D = 2,
|
||||
Undefined = 255
|
||||
};
|
||||
|
||||
enum class TextureViewType : u8 {
|
||||
View1D = 0,
|
||||
View2D = 1,
|
||||
View3D = 2,
|
||||
ViewCube = 3,
|
||||
View1DArray = 4,
|
||||
View2DArray = 5,
|
||||
ViewCubeArray = 6,
|
||||
Undefined = 255
|
||||
};
|
||||
|
||||
/**
|
||||
* A rectangle describing part of a texture
|
||||
* @param x, y are the offset from the bottom left corner
|
||||
* @param width, height are the extent of the rectangle
|
||||
*/
|
||||
struct Rect2D {
|
||||
s32 x = 0;
|
||||
s32 y = 0;
|
||||
u32 width = 0;
|
||||
u32 height = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Information about a texture packed to 8 bytes
|
||||
*/
|
||||
struct TextureInfo {
|
||||
u16 width = 0;
|
||||
u16 height = 0;
|
||||
u8 levels = 0;
|
||||
TextureType type = TextureType::Undefined;
|
||||
TextureViewType view_type = TextureViewType::Undefined;
|
||||
TextureFormat format = TextureFormat::Undefined;
|
||||
|
||||
const u64 Hash() const {
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(TextureInfo) == 8, "TextureInfo not packed!");
|
||||
static_assert(std::is_standard_layout_v<TextureInfo>, "TextureInfo is not a standard layout!");
|
||||
|
||||
class TextureBase;
|
||||
using TextureHandle = IntrusivePtr<TextureBase>;
|
||||
|
||||
class TextureBase : public IntrusivePtrEnabled<TextureBase> {
|
||||
public:
|
||||
TextureBase() = default;
|
||||
TextureBase(const TextureInfo& info) : info(info) {}
|
||||
virtual ~TextureBase() = default;
|
||||
|
||||
/// Uploads pixel data to the GPU memory
|
||||
virtual void Upload(Rect2D rectangle, u32 stride, std::span<const u8> data,
|
||||
u32 level = 0) {};
|
||||
|
||||
/// Downloads pixel data from GPU memory
|
||||
virtual void Download(Rect2D rectangle, u32 stride, std::span<u8> data,
|
||||
u32 level = 0) {};
|
||||
|
||||
/// Copies the rectangle area specified to the destionation texture
|
||||
virtual void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect,
|
||||
u32 src_level = 0, u32 dest_level = 0) {};
|
||||
|
||||
/// Returns the unique texture identifier
|
||||
const u64 GetHash() const {
|
||||
return info.Hash();
|
||||
}
|
||||
|
||||
/// Returns the width of the texture
|
||||
u16 GetWidth() const {
|
||||
return info.width;
|
||||
}
|
||||
|
||||
/// Returns the height of the texture
|
||||
u16 GetHeight() const {
|
||||
return info.height;
|
||||
}
|
||||
|
||||
/// Returns the number of mipmap levels allocated
|
||||
u16 GetMipLevels() const {
|
||||
return info.levels;
|
||||
}
|
||||
|
||||
/// Returns the pixel format
|
||||
TextureFormat GetFormat() const {
|
||||
return info.format;
|
||||
}
|
||||
|
||||
protected:
|
||||
TextureInfo info;
|
||||
};
|
||||
|
||||
struct SamplerInfo {
|
||||
Pica::TextureFilter mag_filter;
|
||||
Pica::TextureFilter min_filter;
|
||||
Pica::TextureFilter mip_filter;
|
||||
Pica::WrapMode wrap_s;
|
||||
Pica::WrapMode wrap_t;
|
||||
u32 border_color = 0;
|
||||
u32 lod_min = 0;
|
||||
u32 lod_max = 0;
|
||||
s32 lod_bias = 0;
|
||||
|
||||
const u64 Hash() const {
|
||||
return Common::ComputeStructHash64(*this);
|
||||
}
|
||||
};
|
||||
|
||||
class SamplerBase : public IntrusivePtrEnabled<SamplerBase> {
|
||||
public:
|
||||
SamplerBase(SamplerInfo info) : info(info) {}
|
||||
virtual ~SamplerBase() = default;
|
||||
|
||||
protected:
|
||||
SamplerInfo info{};
|
||||
};
|
||||
|
||||
using SamplerHandle = IntrusivePtr<SamplerBase>;
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<VideoCore::TextureInfo> {
|
||||
std::size_t operator()(const VideoCore::TextureInfo& info) const noexcept {
|
||||
return info.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<VideoCore::SamplerInfo> {
|
||||
std::size_t operator()(const VideoCore::SamplerInfo& info) const noexcept {
|
||||
return info.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
60
src/video_core/gpu.cpp
Normal file
60
src/video_core/gpu.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include "core/core.h"
|
||||
#include "video_core/pica.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
|
||||
std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::System& system,
|
||||
Frontend::EmuWindow& emu_window) {
|
||||
auto& telemetry_session = system.TelemetrySession();
|
||||
auto& cpu_memory = system.Memory();
|
||||
|
||||
switch (Settings::values.renderer_backend) {
|
||||
case Settings::RendererBackend::OpenGL:
|
||||
return std::make_unique<OpenGL::RendererOpenGL>(emu_window);
|
||||
case Settings::RendererBackend::Vulkan:
|
||||
return std::make_unique<Vulkan::RendererVulkan>(emu_window);
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
namespace Pica {
|
||||
|
||||
GPU::GPU(Core::System& system, Memory::MemorySystem& memory) :
|
||||
system(system), memory(memory) {
|
||||
//renderer = CreateRenderer(system, )
|
||||
rasterizer = renderer->Rasterizer();
|
||||
}
|
||||
|
||||
void GPU::SwapBuffers() {
|
||||
renderer->SwapBuffers();
|
||||
}
|
||||
|
||||
void GPU::FlushAll() {
|
||||
rasterizer->FlushAll();
|
||||
}
|
||||
|
||||
void GPU::FlushRegion(PAddr addr, u32 size) {
|
||||
rasterizer->FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPU::InvalidateRegion(PAddr addr, u32 size) {
|
||||
rasterizer->InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPU::FlushAndInvalidateRegion(PAddr addr, u32 size) {
|
||||
rasterizer->FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPU::ClearAll(bool flush) {
|
||||
rasterizer->ClearAll(flush);
|
||||
}
|
||||
|
||||
} // namespace Pica
|
81
src/video_core/gpu.h
Normal file
81
src/video_core/gpu.h
Normal file
@ -0,0 +1,81 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include "core/frontend/framebuffer_layout.h"
|
||||
#include "video_core/maestro.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Memory {
|
||||
class MemorySystem;
|
||||
}
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Pica {
|
||||
|
||||
class Maestro;
|
||||
|
||||
enum class ResultStatus {
|
||||
Success,
|
||||
ErrorGenericDrivers,
|
||||
ErrorUnsupportedGL,
|
||||
};
|
||||
|
||||
/**
|
||||
* Interface for the PICA GPU
|
||||
*/
|
||||
class GPU {
|
||||
public:
|
||||
GPU(Core::System& system, Memory::MemorySystem& memory);
|
||||
~GPU() = default;
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void SwapBuffers();
|
||||
|
||||
/// Notify rasterizer that all caches should be flushed to 3DS memory
|
||||
void FlushAll();
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
|
||||
void FlushRegion(PAddr addr, u32 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
void InvalidateRegion(PAddr addr, u32 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(PAddr addr, u32 size);
|
||||
|
||||
/// Removes as much state as possible from the rasterizer in preparation for a save/load state
|
||||
void ClearAll(bool flush);
|
||||
|
||||
/// Request a screenshot of the next frame
|
||||
void RequestScreenshot(u8* data, std::function<void()> callback,
|
||||
const Layout::FramebufferLayout& layout);
|
||||
|
||||
/// Returns the resolution scale factor
|
||||
u16 GetResolutionScaleFactor();
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
Memory::MemorySystem& memory;
|
||||
|
||||
// Renderer
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
std::unique_ptr<VideoCore::RendererBase> renderer = nullptr;
|
||||
std::unique_ptr<Maestro> maestro = nullptr;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
@ -34,13 +34,13 @@ template <typename T>
|
||||
void Zero(T& o) {
|
||||
static_assert(std::is_trivially_copyable_v<T>,
|
||||
"It's undefined behavior to memset a non-trivially copyable type");
|
||||
std::memset(&o, 0, sizeof(o));
|
||||
memset(&o, 0, sizeof(o));
|
||||
}
|
||||
|
||||
State::State() : geometry_pipeline(*this) {
|
||||
auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
|
||||
using Pica::Shader::OutputVertex;
|
||||
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
|
||||
auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
|
||||
const OutputVertex& v2) {
|
||||
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
||||
};
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/regs_texturing.h"
|
||||
namespace Pica {
|
||||
|
||||
/// Initialize Pica state
|
||||
|
400
src/video_core/pica_regs.inc
Normal file
400
src/video_core/pica_regs.inc
Normal file
@ -0,0 +1,400 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
//#define PICA_REG(name, address)
|
||||
PICA_REG(FINALIZE, 0x010)
|
||||
PICA_REG(FACECULLING_CONFIG, 0x040)
|
||||
PICA_REG(VIEWPORT_WIDTH, 0x041)
|
||||
PICA_REG(VIEWPORT_INVW, 0x042)
|
||||
PICA_REG(VIEWPORT_HEIGHT, 0x043)
|
||||
PICA_REG(VIEWPORT_INVH, 0x044)
|
||||
PICA_REG(FRAGOP_CLIP, 0x047)
|
||||
PICA_REG(FRAGOP_CLIP_DATA0, 0x048)
|
||||
PICA_REG(FRAGOP_CLIP_DATA1, 0x049)
|
||||
PICA_REG(FRAGOP_CLIP_DATA2, 0x04A)
|
||||
PICA_REG(FRAGOP_CLIP_DATA3, 0x04B)
|
||||
PICA_REG(DEPTHMAP_SCALE, 0x04D)
|
||||
PICA_REG(DEPTHMAP_OFFSET, 0x04E)
|
||||
PICA_REG(SH_OUTMAP_TOTAL, 0x04F)
|
||||
PICA_REG(SH_OUTMAP_O0, 0x050)
|
||||
PICA_REG(SH_OUTMAP_O1, 0x051)
|
||||
PICA_REG(SH_OUTMAP_O2, 0x052)
|
||||
PICA_REG(SH_OUTMAP_O3, 0x053)
|
||||
PICA_REG(SH_OUTMAP_O4, 0x054)
|
||||
PICA_REG(SH_OUTMAP_O5, 0x055)
|
||||
PICA_REG(SH_OUTMAP_O6, 0x056)
|
||||
PICA_REG(EARLYDEPTH_FUNC, 0x061)
|
||||
PICA_REG(EARLYDEPTH_TEST1, 0x062)
|
||||
PICA_REG(EARLYDEPTH_CLEAR, 0x063)
|
||||
PICA_REG(SH_OUTATTR_MODE, 0x064)
|
||||
PICA_REG(SCISSORTEST_MODE, 0x065)
|
||||
PICA_REG(SCISSORTEST_POS, 0x066)
|
||||
PICA_REG(SCISSORTEST_DIM, 0x067)
|
||||
PICA_REG(VIEWPORT_XY, 0x068)
|
||||
PICA_REG(EARLYDEPTH_DATA, 0x06A)
|
||||
PICA_REG(DEPTHMAP_ENABLE, 0x06D)
|
||||
PICA_REG(RENDERBUF_DIM, 0x06E)
|
||||
PICA_REG(SH_OUTATTR_CLOCK, 0x06F)
|
||||
PICA_REG(TEXUNIT_CONFIG, 0x080)
|
||||
PICA_REG(TEXUNIT0_BORDER_COLOR, 0x081)
|
||||
PICA_REG(TEXUNIT0_DIM, 0x082)
|
||||
PICA_REG(TEXUNIT0_PARAM, 0x083)
|
||||
PICA_REG(TEXUNIT0_LOD, 0x084)
|
||||
PICA_REG(TEXUNIT0_ADDR1, 0x085)
|
||||
PICA_REG(TEXUNIT0_ADDR2, 0x086)
|
||||
PICA_REG(TEXUNIT0_ADDR3, 0x087)
|
||||
PICA_REG(TEXUNIT0_ADDR4, 0x088)
|
||||
PICA_REG(TEXUNIT0_ADDR5, 0x089)
|
||||
PICA_REG(TEXUNIT0_ADDR6, 0x08A)
|
||||
PICA_REG(TEXUNIT0_SHADOW, 0x08B)
|
||||
PICA_REG(TEXUNIT0_TYPE, 0x08E)
|
||||
PICA_REG(LIGHTING_ENABLE0, 0x08F)
|
||||
PICA_REG(TEXUNIT1_BORDER_COLOR, 0x091)
|
||||
PICA_REG(TEXUNIT1_DIM, 0x092)
|
||||
PICA_REG(TEXUNIT1_PARAM, 0x093)
|
||||
PICA_REG(TEXUNIT1_LOD, 0x094)
|
||||
PICA_REG(TEXUNIT1_ADDR, 0x095)
|
||||
PICA_REG(TEXUNIT1_TYPE, 0x096)
|
||||
PICA_REG(TEXUNIT2_BORDER_COLOR, 0x099)
|
||||
PICA_REG(TEXUNIT2_DIM, 0x09A)
|
||||
PICA_REG(TEXUNIT2_PARAM, 0x09B)
|
||||
PICA_REG(TEXUNIT2_LOD, 0x09C)
|
||||
PICA_REG(TEXUNIT2_ADDR, 0x09D)
|
||||
PICA_REG(TEXUNIT2_TYPE, 0x09E)
|
||||
PICA_REG(TEXUNIT3_PROCTEX0, 0x0A8)
|
||||
PICA_REG(TEXUNIT3_PROCTEX1, 0x0A9)
|
||||
PICA_REG(TEXUNIT3_PROCTEX2, 0x0AA)
|
||||
PICA_REG(TEXUNIT3_PROCTEX3, 0x0AB)
|
||||
PICA_REG(TEXUNIT3_PROCTEX4, 0x0AC)
|
||||
PICA_REG(TEXUNIT3_PROCTEX5, 0x0AD)
|
||||
PICA_REG(PROCTEX_LUT, 0x0AF)
|
||||
PICA_REG(PROCTEX_LUT_DATA0, 0x0B0)
|
||||
PICA_REG(PROCTEX_LUT_DATA1, 0x0B1)
|
||||
PICA_REG(PROCTEX_LUT_DATA2, 0x0B2)
|
||||
PICA_REG(PROCTEX_LUT_DATA3, 0x0B3)
|
||||
PICA_REG(PROCTEX_LUT_DATA4, 0x0B4)
|
||||
PICA_REG(PROCTEX_LUT_DATA5, 0x0B5)
|
||||
PICA_REG(PROCTEX_LUT_DATA6, 0x0B6)
|
||||
PICA_REG(PROCTEX_LUT_DATA7, 0x0B7)
|
||||
PICA_REG(TEXENV0_SOURCE, 0x0C0)
|
||||
PICA_REG(TEXENV0_OPERAND, 0x0C1)
|
||||
PICA_REG(TEXENV0_COMBINER, 0x0C2)
|
||||
PICA_REG(TEXENV0_COLOR, 0x0C3)
|
||||
PICA_REG(TEXENV0_SCALE, 0x0C4)
|
||||
PICA_REG(TEXENV1_SOURCE, 0x0C8)
|
||||
PICA_REG(TEXENV1_OPERAND, 0x0C9)
|
||||
PICA_REG(TEXENV1_COMBINER, 0x0CA)
|
||||
PICA_REG(TEXENV1_COLOR, 0x0CB)
|
||||
PICA_REG(TEXENV1_SCALE, 0x0CC)
|
||||
PICA_REG(TEXENV2_SOURCE, 0x0D0)
|
||||
PICA_REG(TEXENV2_OPERAND, 0x0D1)
|
||||
PICA_REG(TEXENV2_COMBINER, 0x0D2)
|
||||
PICA_REG(TEXENV2_COLOR, 0x0D3)
|
||||
PICA_REG(TEXENV2_SCALE, 0x0D4)
|
||||
PICA_REG(TEXENV3_SOURCE, 0x0D8)
|
||||
PICA_REG(TEXENV3_OPERAND, 0x0D9)
|
||||
PICA_REG(TEXENV3_COMBINER, 0x0DA)
|
||||
PICA_REG(TEXENV3_COLOR, 0x0DB)
|
||||
PICA_REG(TEXENV3_SCALE, 0x0DC)
|
||||
PICA_REG(TEXENV_UPDATE_BUFFER, 0x0E0)
|
||||
PICA_REG(FOG_COLOR, 0x0E1)
|
||||
PICA_REG(GAS_ATTENUATION, 0x0E4)
|
||||
PICA_REG(GAS_ACCMAX, 0x0E5)
|
||||
PICA_REG(FOG_LUT_INDEX, 0x0E6)
|
||||
PICA_REG(FOG_LUT_DATA0, 0x0E8)
|
||||
PICA_REG(FOG_LUT_DATA1, 0x0E9)
|
||||
PICA_REG(FOG_LUT_DATA2, 0x0EA)
|
||||
PICA_REG(FOG_LUT_DATA3, 0x0EB)
|
||||
PICA_REG(FOG_LUT_DATA4, 0x0EC)
|
||||
PICA_REG(FOG_LUT_DATA5, 0x0ED)
|
||||
PICA_REG(FOG_LUT_DATA6, 0x0EE)
|
||||
PICA_REG(FOG_LUT_DATA7, 0x0EF)
|
||||
PICA_REG(TEXENV4_SOURCE, 0x0F0)
|
||||
PICA_REG(TEXENV4_OPERAND, 0x0F1)
|
||||
PICA_REG(TEXENV4_COMBINER, 0x0F2)
|
||||
PICA_REG(TEXENV4_COLOR, 0x0F3)
|
||||
PICA_REG(TEXENV4_SCALE, 0x0F4)
|
||||
PICA_REG(TEXENV5_SOURCE, 0x0F8)
|
||||
PICA_REG(TEXENV5_OPERAND, 0x0F9)
|
||||
PICA_REG(TEXENV5_COMBINER, 0x0FA)
|
||||
PICA_REG(TEXENV5_COLOR, 0x0FB)
|
||||
PICA_REG(TEXENV5_SCALE, 0x0FC)
|
||||
PICA_REG(TEXENV_BUFFER_COLOR, 0x0FD)
|
||||
PICA_REG(COLOR_OPERATION, 0x100)
|
||||
PICA_REG(BLEND_FUNC, 0x101)
|
||||
PICA_REG(LOGIC_OP, 0x102)
|
||||
PICA_REG(BLEND_COLOR, 0x103)
|
||||
PICA_REG(FRAGOP_ALPHA_TEST, 0x104)
|
||||
PICA_REG(STENCIL_TEST, 0x105)
|
||||
PICA_REG(STENCIL_OP, 0x106)
|
||||
PICA_REG(DEPTH_COLOR_MASK, 0x107)
|
||||
PICA_REG(FRAMEBUFFER_INVALIDATE, 0x110)
|
||||
PICA_REG(FRAMEBUFFER_FLUSH, 0x111)
|
||||
PICA_REG(COLORBUFFER_READ, 0x112)
|
||||
PICA_REG(COLORBUFFER_WRITE, 0x113)
|
||||
PICA_REG(DEPTHBUFFER_READ, 0x114)
|
||||
PICA_REG(DEPTHBUFFER_WRITE, 0x115)
|
||||
PICA_REG(DEPTHBUFFER_FORMAT, 0x116)
|
||||
PICA_REG(COLORBUFFER_FORMAT, 0x117)
|
||||
PICA_REG(EARLYDEPTH_TEST2, 0x118)
|
||||
PICA_REG(FRAMEBUFFER_BLOCK32, 0x11B)
|
||||
PICA_REG(DEPTHBUFFER_LOC, 0x11C)
|
||||
PICA_REG(COLORBUFFER_LOC, 0x11D)
|
||||
PICA_REG(FRAMEBUFFER_DIM, 0x11E)
|
||||
PICA_REG(GAS_LIGHT_XY, 0x120)
|
||||
PICA_REG(GAS_LIGHT_Z, 0x121)
|
||||
PICA_REG(GAS_LIGHT_Z_COLOR, 0x122)
|
||||
PICA_REG(GAS_LUT_INDEX, 0x123)
|
||||
PICA_REG(GAS_LUT_DATA, 0x124)
|
||||
PICA_REG(GAS_DELTAZ_DEPTH, 0x126)
|
||||
PICA_REG(FRAGOP_SHADOW, 0x130)
|
||||
PICA_REG(LIGHT0_SPECULAR0, 0x140)
|
||||
PICA_REG(LIGHT0_SPECULAR1, 0x141)
|
||||
PICA_REG(LIGHT0_DIFFUSE, 0x142)
|
||||
PICA_REG(LIGHT0_AMBIENT, 0x143)
|
||||
PICA_REG(LIGHT0_XY, 0x144)
|
||||
PICA_REG(LIGHT0_Z, 0x145)
|
||||
PICA_REG(LIGHT0_SPOTDIR_XY, 0x146)
|
||||
PICA_REG(LIGHT0_SPOTDIR_Z, 0x147)
|
||||
PICA_REG(LIGHT0_CONFIG, 0x149)
|
||||
PICA_REG(LIGHT0_ATTENUATION_BIAS, 0x14A)
|
||||
PICA_REG(LIGHT0_ATTENUATION_SCALE, 0x14B)
|
||||
PICA_REG(LIGHT1_SPECULAR0, 0x150)
|
||||
PICA_REG(LIGHT1_SPECULAR1, 0x151)
|
||||
PICA_REG(LIGHT1_DIFFUSE, 0x152)
|
||||
PICA_REG(LIGHT1_AMBIENT, 0x153)
|
||||
PICA_REG(LIGHT1_XY, 0x154)
|
||||
PICA_REG(LIGHT1_Z, 0x155)
|
||||
PICA_REG(LIGHT1_SPOTDIR_XY, 0x156)
|
||||
PICA_REG(LIGHT1_SPOTDIR_Z, 0x157)
|
||||
PICA_REG(LIGHT1_CONFIG, 0x159)
|
||||
PICA_REG(LIGHT1_ATTENUATION_BIAS, 0x15A)
|
||||
PICA_REG(LIGHT1_ATTENUATION_SCALE, 0x15B)
|
||||
PICA_REG(LIGHT2_SPECULAR0, 0x160)
|
||||
PICA_REG(LIGHT2_SPECULAR1, 0x161)
|
||||
PICA_REG(LIGHT2_DIFFUSE, 0x162)
|
||||
PICA_REG(LIGHT2_AMBIENT, 0x163)
|
||||
PICA_REG(LIGHT2_XY, 0x164)
|
||||
PICA_REG(LIGHT2_Z, 0x165)
|
||||
PICA_REG(LIGHT2_SPOTDIR_XY, 0x166)
|
||||
PICA_REG(LIGHT2_SPOTDIR_Z, 0x167)
|
||||
PICA_REG(LIGHT2_CONFIG, 0x169)
|
||||
PICA_REG(LIGHT2_ATTENUATION_BIAS, 0x16A)
|
||||
PICA_REG(LIGHT2_ATTENUATION_SCALE, 0x16B)
|
||||
PICA_REG(LIGHT3_SPECULAR0, 0x170)
|
||||
PICA_REG(LIGHT3_SPECULAR1, 0x171)
|
||||
PICA_REG(LIGHT3_DIFFUSE, 0x172)
|
||||
PICA_REG(LIGHT3_AMBIENT, 0x173)
|
||||
PICA_REG(LIGHT3_XY, 0x174)
|
||||
PICA_REG(LIGHT3_Z, 0x175)
|
||||
PICA_REG(LIGHT3_SPOTDIR_XY, 0x176)
|
||||
PICA_REG(LIGHT3_SPOTDIR_Z, 0x177)
|
||||
PICA_REG(LIGHT3_CONFIG, 0x179)
|
||||
PICA_REG(LIGHT3_ATTENUATION_BIAS, 0x17A)
|
||||
PICA_REG(LIGHT3_ATTENUATION_SCALE, 0x17B)
|
||||
PICA_REG(LIGHT4_SPECULAR0, 0x180)
|
||||
PICA_REG(LIGHT4_SPECULAR1, 0x181)
|
||||
PICA_REG(LIGHT4_DIFFUSE, 0x182)
|
||||
PICA_REG(LIGHT4_AMBIENT, 0x183)
|
||||
PICA_REG(LIGHT4_XY, 0x184)
|
||||
PICA_REG(LIGHT4_Z, 0x185)
|
||||
PICA_REG(LIGHT4_SPOTDIR_XY, 0x186)
|
||||
PICA_REG(LIGHT4_SPOTDIR_Z, 0x187)
|
||||
PICA_REG(LIGHT4_CONFIG, 0x189)
|
||||
PICA_REG(LIGHT4_ATTENUATION_BIAS, 0x18A)
|
||||
PICA_REG(LIGHT4_ATTENUATION_SCALE, 0x18B)
|
||||
PICA_REG(LIGHT5_SPECULAR0, 0x190)
|
||||
PICA_REG(LIGHT5_SPECULAR1, 0x191)
|
||||
PICA_REG(LIGHT5_DIFFUSE, 0x192)
|
||||
PICA_REG(LIGHT5_AMBIENT, 0x193)
|
||||
PICA_REG(LIGHT5_XY, 0x194)
|
||||
PICA_REG(LIGHT5_Z, 0x195)
|
||||
PICA_REG(LIGHT5_SPOTDIR_XY, 0x196)
|
||||
PICA_REG(LIGHT5_SPOTDIR_Z, 0x197)
|
||||
PICA_REG(LIGHT5_CONFIG, 0x199)
|
||||
PICA_REG(LIGHT5_ATTENUATION_BIAS, 0x19A)
|
||||
PICA_REG(LIGHT5_ATTENUATION_SCALE, 0x19B)
|
||||
PICA_REG(LIGHT6_SPECULAR0, 0x1A0)
|
||||
PICA_REG(LIGHT6_SPECULAR1, 0x1A1)
|
||||
PICA_REG(LIGHT6_DIFFUSE, 0x1A2)
|
||||
PICA_REG(LIGHT6_AMBIENT, 0x1A3)
|
||||
PICA_REG(LIGHT6_XY, 0x1A4)
|
||||
PICA_REG(LIGHT6_Z, 0x1A5)
|
||||
PICA_REG(LIGHT6_SPOTDIR_XY, 0x1A6)
|
||||
PICA_REG(LIGHT6_SPOTDIR_Z, 0x1A7)
|
||||
PICA_REG(LIGHT6_CONFIG, 0x1A9)
|
||||
PICA_REG(LIGHT6_ATTENUATION_BIAS, 0x1AA)
|
||||
PICA_REG(LIGHT6_ATTENUATION_SCALE, 0x1AB)
|
||||
PICA_REG(LIGHT7_SPECULAR0, 0x1B0)
|
||||
PICA_REG(LIGHT7_SPECULAR1, 0x1B1)
|
||||
PICA_REG(LIGHT7_DIFFUSE, 0x1B2)
|
||||
PICA_REG(LIGHT7_AMBIENT, 0x1B3)
|
||||
PICA_REG(LIGHT7_XY, 0x1B4)
|
||||
PICA_REG(LIGHT7_Z, 0x1B5)
|
||||
PICA_REG(LIGHT7_SPOTDIR_XY, 0x1B6)
|
||||
PICA_REG(LIGHT7_SPOTDIR_Z, 0x1B7)
|
||||
PICA_REG(LIGHT7_CONFIG, 0x1B9)
|
||||
PICA_REG(LIGHT7_ATTENUATION_BIAS, 0x1BA)
|
||||
PICA_REG(LIGHT7_ATTENUATION_SCALE, 0x1BB)
|
||||
PICA_REG(LIGHTING_AMBIENT, 0x1C0)
|
||||
PICA_REG(LIGHTING_NUM_LIGHTS, 0x1C2)
|
||||
PICA_REG(LIGHTING_CONFIG0, 0x1C3)
|
||||
PICA_REG(LIGHTING_CONFIG1, 0x1C4)
|
||||
PICA_REG(LIGHTING_LUT_INDEX, 0x1C5)
|
||||
PICA_REG(LIGHTING_ENABLE1, 0x1C6)
|
||||
PICA_REG(LIGHTING_LUT_DATA0, 0x1C8)
|
||||
PICA_REG(LIGHTING_LUT_DATA1, 0x1C9)
|
||||
PICA_REG(LIGHTING_LUT_DATA2, 0x1CA)
|
||||
PICA_REG(LIGHTING_LUT_DATA3, 0x1CB)
|
||||
PICA_REG(LIGHTING_LUT_DATA4, 0x1CC)
|
||||
PICA_REG(LIGHTING_LUT_DATA5, 0x1CD)
|
||||
PICA_REG(LIGHTING_LUT_DATA6, 0x1CE)
|
||||
PICA_REG(LIGHTING_LUT_DATA7, 0x1CF)
|
||||
PICA_REG(LIGHTING_LUTINPUT_ABS, 0x1D0)
|
||||
PICA_REG(LIGHTING_LUTINPUT_SELECT, 0x1D1)
|
||||
PICA_REG(LIGHTING_LUTINPUT_SCALE, 0x1D2)
|
||||
PICA_REG(LIGHTING_LIGHT_PERMUTATION, 0x1D9)
|
||||
PICA_REG(ATTRIBBUFFERS_LOC, 0x200)
|
||||
PICA_REG(ATTRIBBUFFERS_FORMAT_LOW, 0x201)
|
||||
PICA_REG(ATTRIBBUFFERS_FORMAT_HIGH, 0x202)
|
||||
PICA_REG(ATTRIBBUFFER0_OFFSET, 0x203)
|
||||
PICA_REG(ATTRIBBUFFER0_CONFIG1, 0x204)
|
||||
PICA_REG(ATTRIBBUFFER0_CONFIG2, 0x205)
|
||||
PICA_REG(ATTRIBBUFFER1_OFFSET, 0x206)
|
||||
PICA_REG(ATTRIBBUFFER1_CONFIG1, 0x207)
|
||||
PICA_REG(ATTRIBBUFFER1_CONFIG2, 0x208)
|
||||
PICA_REG(ATTRIBBUFFER2_OFFSET, 0x209)
|
||||
PICA_REG(ATTRIBBUFFER2_CONFIG1, 0x20A)
|
||||
PICA_REG(ATTRIBBUFFER2_CONFIG2, 0x20B)
|
||||
PICA_REG(ATTRIBBUFFER3_OFFSET, 0x20C)
|
||||
PICA_REG(ATTRIBBUFFER3_CONFIG1, 0x20D)
|
||||
PICA_REG(ATTRIBBUFFER3_CONFIG2, 0x20E)
|
||||
PICA_REG(ATTRIBBUFFER4_OFFSET, 0x20F)
|
||||
PICA_REG(ATTRIBBUFFER4_CONFIG1, 0x210)
|
||||
PICA_REG(ATTRIBBUFFER4_CONFIG2, 0x211)
|
||||
PICA_REG(ATTRIBBUFFER5_OFFSET, 0x212)
|
||||
PICA_REG(ATTRIBBUFFER5_CONFIG1, 0x213)
|
||||
PICA_REG(ATTRIBBUFFER5_CONFIG2, 0x214)
|
||||
PICA_REG(ATTRIBBUFFER6_OFFSET, 0x215)
|
||||
PICA_REG(ATTRIBBUFFER6_CONFIG1, 0x216)
|
||||
PICA_REG(ATTRIBBUFFER6_CONFIG2, 0x217)
|
||||
PICA_REG(ATTRIBBUFFER7_OFFSET, 0x218)
|
||||
PICA_REG(ATTRIBBUFFER7_CONFIG1, 0x219)
|
||||
PICA_REG(ATTRIBBUFFER7_CONFIG2, 0x21A)
|
||||
PICA_REG(ATTRIBBUFFER8_OFFSET, 0x21B)
|
||||
PICA_REG(ATTRIBBUFFER8_CONFIG1, 0x21C)
|
||||
PICA_REG(ATTRIBBUFFER8_CONFIG2, 0x21D)
|
||||
PICA_REG(ATTRIBBUFFER9_OFFSET, 0x21E)
|
||||
PICA_REG(ATTRIBBUFFER9_CONFIG1, 0x21F)
|
||||
PICA_REG(ATTRIBBUFFER9_CONFIG2, 0x220)
|
||||
PICA_REG(ATTRIBBUFFER10_OFFSET, 0x221)
|
||||
PICA_REG(ATTRIBBUFFER10_CONFIG1, 0x222)
|
||||
PICA_REG(ATTRIBBUFFER10_CONFIG2, 0x223)
|
||||
PICA_REG(ATTRIBBUFFER11_OFFSET, 0x224)
|
||||
PICA_REG(ATTRIBBUFFER11_CONFIG1, 0x225)
|
||||
PICA_REG(ATTRIBBUFFER11_CONFIG2, 0x226)
|
||||
PICA_REG(INDEXBUFFER_CONFIG, 0x227)
|
||||
PICA_REG(NUMVERTICES, 0x228)
|
||||
PICA_REG(GEOSTAGE_CONFIG, 0x229)
|
||||
PICA_REG(VERTEX_OFFSET, 0x22A)
|
||||
PICA_REG(POST_VERTEX_CACHE_NUM, 0x22D)
|
||||
PICA_REG(DRAWARRAYS, 0x22E)
|
||||
PICA_REG(DRAWELEMENTS, 0x22F)
|
||||
PICA_REG(VTX_FUNC, 0x231)
|
||||
PICA_REG(FIXEDATTRIB_INDEX, 0x232)
|
||||
PICA_REG(FIXEDATTRIB_DATA0, 0x233)
|
||||
PICA_REG(FIXEDATTRIB_DATA1, 0x234)
|
||||
PICA_REG(FIXEDATTRIB_DATA2, 0x235)
|
||||
PICA_REG(CMDBUF_SIZE0, 0x238)
|
||||
PICA_REG(CMDBUF_SIZE1, 0x239)
|
||||
PICA_REG(CMDBUF_ADDR0, 0x23A)
|
||||
PICA_REG(CMDBUF_ADDR1, 0x23B)
|
||||
PICA_REG(CMDBUF_JUMP0, 0x23C)
|
||||
PICA_REG(CMDBUF_JUMP1, 0x23D)
|
||||
PICA_REG(VSH_NUM_ATTR, 0x242)
|
||||
PICA_REG(VSH_COM_MODE, 0x244)
|
||||
PICA_REG(START_DRAW_FUNC0, 0x245)
|
||||
PICA_REG(VSH_OUTMAP_TOTAL1, 0x24A)
|
||||
PICA_REG(VSH_OUTMAP_TOTAL2, 0x251)
|
||||
PICA_REG(GSH_MISC0, 0x252)
|
||||
PICA_REG(GEOSTAGE_CONFIG2, 0x253)
|
||||
PICA_REG(GSH_MISC1, 0x254)
|
||||
PICA_REG(PRIMITIVE_CONFIG, 0x25E)
|
||||
PICA_REG(RESTART_PRIMITIVE, 0x25F)
|
||||
PICA_REG(GSH_BOOLUNIFORM, 0x280)
|
||||
PICA_REG(GSH_INTUNIFORM_I0, 0x281)
|
||||
PICA_REG(GSH_INTUNIFORM_I1, 0x282)
|
||||
PICA_REG(GSH_INTUNIFORM_I2, 0x283)
|
||||
PICA_REG(GSH_INTUNIFORM_I3, 0x284)
|
||||
PICA_REG(GSH_INPUTBUFFER_CONFIG, 0x289)
|
||||
PICA_REG(GSH_ENTRYPOINT, 0x28A)
|
||||
PICA_REG(GSH_ATTRIBUTES_PERMUTATION_LOW, 0x28B)
|
||||
PICA_REG(GSH_ATTRIBUTES_PERMUTATION_HIGH, 0x28C)
|
||||
PICA_REG(GSH_OUTMAP_MASK, 0x28D)
|
||||
PICA_REG(GSH_CODETRANSFER_END, 0x28F)
|
||||
PICA_REG(GSH_FLOATUNIFORM_INDEX, 0x290)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA0, 0x291)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA1, 0x292)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA2, 0x293)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA3, 0x294)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA4, 0x295)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA5, 0x296)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA6, 0x297)
|
||||
PICA_REG(GSH_FLOATUNIFORM_DATA7, 0x298)
|
||||
PICA_REG(GSH_CODETRANSFER_INDEX, 0x29B)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA0, 0x29C)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA1, 0x29D)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA2, 0x29E)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA3, 0x29F)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA4, 0x2A0)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA5, 0x2A1)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA6, 0x2A2)
|
||||
PICA_REG(GSH_CODETRANSFER_DATA7, 0x2A3)
|
||||
PICA_REG(GSH_OPDESCS_INDEX, 0x2A5)
|
||||
PICA_REG(GSH_OPDESCS_DATA0, 0x2A6)
|
||||
PICA_REG(GSH_OPDESCS_DATA1, 0x2A7)
|
||||
PICA_REG(GSH_OPDESCS_DATA2, 0x2A8)
|
||||
PICA_REG(GSH_OPDESCS_DATA3, 0x2A9)
|
||||
PICA_REG(GSH_OPDESCS_DATA4, 0x2AA)
|
||||
PICA_REG(GSH_OPDESCS_DATA5, 0x2AB)
|
||||
PICA_REG(GSH_OPDESCS_DATA6, 0x2AC)
|
||||
PICA_REG(GSH_OPDESCS_DATA7, 0x2AD)
|
||||
PICA_REG(VSH_BOOLUNIFORM, 0x2B0)
|
||||
PICA_REG(VSH_INTUNIFORM_I0, 0x2B1)
|
||||
PICA_REG(VSH_INTUNIFORM_I1, 0x2B2)
|
||||
PICA_REG(VSH_INTUNIFORM_I2, 0x2B3)
|
||||
PICA_REG(VSH_INTUNIFORM_I3, 0x2B4)
|
||||
PICA_REG(VSH_INPUTBUFFER_CONFIG, 0x2B9)
|
||||
PICA_REG(VSH_ENTRYPOINT, 0x2BA)
|
||||
PICA_REG(VSH_ATTRIBUTES_PERMUTATION_LOW, 0x2BB)
|
||||
PICA_REG(VSH_ATTRIBUTES_PERMUTATION_HIGH, 0x2BC)
|
||||
PICA_REG(VSH_OUTMAP_MASK, 0x2BD)
|
||||
PICA_REG(VSH_CODETRANSFER_END, 0x2BF)
|
||||
PICA_REG(VSH_FLOATUNIFORM_INDEX, 0x2C0)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA0, 0x2C1)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA1, 0x2C2)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA2, 0x2C3)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA3, 0x2C4)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA4, 0x2C5)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA5, 0x2C6)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA6, 0x2C7)
|
||||
PICA_REG(VSH_FLOATUNIFORM_DATA7, 0x2C8)
|
||||
PICA_REG(VSH_CODETRANSFER_INDEX, 0x2CB)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA0, 0x2CC)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA1, 0x2CD)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA2, 0x2CE)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA3, 0x2CF)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA4, 0x2D0)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA5, 0x2D1)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA6, 0x2D2)
|
||||
PICA_REG(VSH_CODETRANSFER_DATA7, 0x2D3)
|
||||
PICA_REG(VSH_OPDESCS_INDEX, 0x2D5)
|
||||
PICA_REG(VSH_OPDESCS_DATA0, 0x2D6)
|
||||
PICA_REG(VSH_OPDESCS_DATA1, 0x2D7)
|
||||
PICA_REG(VSH_OPDESCS_DATA2, 0x2D8)
|
||||
PICA_REG(VSH_OPDESCS_DATA3, 0x2D9)
|
||||
PICA_REG(VSH_OPDESCS_DATA4, 0x2DA)
|
||||
PICA_REG(VSH_OPDESCS_DATA5, 0x2DB)
|
||||
PICA_REG(VSH_OPDESCS_DATA6, 0x2DC)
|
||||
PICA_REG(VSH_OPDESCS_DATA7, 0x2DD)
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
@ -22,40 +22,44 @@ namespace Pica {
|
||||
*
|
||||
* @todo Verify on HW if this conversion is sufficiently accurate.
|
||||
*/
|
||||
template <unsigned M, unsigned E>
|
||||
template <u32 M, u32 E>
|
||||
struct Float {
|
||||
static constexpr u32 width = M + E + 1;
|
||||
static constexpr u32 bias = 128 - (1 << (E - 1));
|
||||
static constexpr u32 exponent_mask = (1 << E) - 1;
|
||||
static constexpr u32 mantissa_mask = (1 << M) - 1;
|
||||
static constexpr u32 sign_mask = 1 << (E + M);
|
||||
public:
|
||||
static Float<M, E> FromFloat32(float val) {
|
||||
Float<M, E> ret;
|
||||
static Float FromFloat32(float val) {
|
||||
Float ret;
|
||||
ret.value = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static Float<M, E> FromRaw(u32 hex) {
|
||||
Float<M, E> res;
|
||||
static Float FromRaw(u32 hex) {
|
||||
Float res;
|
||||
|
||||
const int width = M + E + 1;
|
||||
const int bias = 128 - (1 << (E - 1));
|
||||
int exponent = (hex >> M) & ((1 << E) - 1);
|
||||
const unsigned mantissa = hex & ((1 << M) - 1);
|
||||
const unsigned sign = (hex >> (E + M)) << 31;
|
||||
u32 exponent = (hex >> M) & exponent_mask;
|
||||
const u32 mantissa = hex & mantissa_mask;
|
||||
const u32 sign = (hex & sign_mask) << (31 - M - E);
|
||||
|
||||
if (hex & ((1 << (width - 1)) - 1)) {
|
||||
if (exponent == (1 << E) - 1)
|
||||
if (hex & (mantissa_mask | (exponent_mask << M))) {
|
||||
if (exponent == exponent_mask) {
|
||||
exponent = 255;
|
||||
else
|
||||
} else {
|
||||
exponent += bias;
|
||||
}
|
||||
|
||||
hex = sign | (mantissa << (23 - M)) | (exponent << 23);
|
||||
} else {
|
||||
hex = sign;
|
||||
}
|
||||
|
||||
std::memcpy(&res.value, &hex, sizeof(float));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static Float<M, E> Zero() {
|
||||
static Float Zero() {
|
||||
return FromFloat32(0.f);
|
||||
}
|
||||
|
||||
@ -64,80 +68,77 @@ public:
|
||||
return value;
|
||||
}
|
||||
|
||||
Float<M, E> operator*(const Float<M, E>& flt) const {
|
||||
Float operator*(const Float& flt) const {
|
||||
float result = value * flt.ToFloat32();
|
||||
// PICA gives 0 instead of NaN when multiplying by inf
|
||||
if (std::isnan(result))
|
||||
if (!std::isnan(value) && !std::isnan(flt.ToFloat32()))
|
||||
result = 0.f;
|
||||
return Float<M, E>::FromFloat32(result);
|
||||
if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) {
|
||||
result = 0.f;
|
||||
}
|
||||
|
||||
return Float::FromFloat32(result);
|
||||
}
|
||||
|
||||
Float<M, E> operator/(const Float<M, E>& flt) const {
|
||||
return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
|
||||
Float operator/(const Float& flt) const {
|
||||
return Float::FromFloat32(ToFloat32() / flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float<M, E> operator+(const Float<M, E>& flt) const {
|
||||
return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
|
||||
Float operator+(const Float& flt) const {
|
||||
return Float::FromFloat32(ToFloat32() + flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float<M, E> operator-(const Float<M, E>& flt) const {
|
||||
return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
|
||||
Float operator-(const Float& flt) const {
|
||||
return Float::FromFloat32(ToFloat32() - flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float<M, E>& operator*=(const Float<M, E>& flt) {
|
||||
Float& operator*=(const Float& flt) {
|
||||
value = operator*(flt).value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E>& operator/=(const Float<M, E>& flt) {
|
||||
Float& operator/=(const Float& flt) {
|
||||
value /= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E>& operator+=(const Float<M, E>& flt) {
|
||||
Float& operator+=(const Float& flt) {
|
||||
value += flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E>& operator-=(const Float<M, E>& flt) {
|
||||
Float& operator-=(const Float& flt) {
|
||||
value -= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E> operator-() const {
|
||||
return Float<M, E>::FromFloat32(-ToFloat32());
|
||||
Float operator-() const {
|
||||
return Float::FromFloat32(-ToFloat32());
|
||||
}
|
||||
|
||||
bool operator<(const Float<M, E>& flt) const {
|
||||
bool operator<(const Float& flt) const {
|
||||
return ToFloat32() < flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator>(const Float<M, E>& flt) const {
|
||||
bool operator>(const Float& flt) const {
|
||||
return ToFloat32() > flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator>=(const Float<M, E>& flt) const {
|
||||
bool operator>=(const Float& flt) const {
|
||||
return ToFloat32() >= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator<=(const Float<M, E>& flt) const {
|
||||
bool operator<=(const Float& flt) const {
|
||||
return ToFloat32() <= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator==(const Float<M, E>& flt) const {
|
||||
bool operator==(const Float& flt) const {
|
||||
return ToFloat32() == flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator!=(const Float<M, E>& flt) const {
|
||||
bool operator!=(const Float& flt) const {
|
||||
return ToFloat32() != flt.ToFloat32();
|
||||
}
|
||||
|
||||
private:
|
||||
static const unsigned MASK = (1 << (M + E + 1)) - 1;
|
||||
static const unsigned MANTISSA_MASK = (1 << M) - 1;
|
||||
static const unsigned EXPONENT_MASK = (1 << E) - 1;
|
||||
|
||||
// Stored as a regular float, merely for convenience
|
||||
// TODO: Perform proper arithmetic on this!
|
||||
float value;
|
||||
|
@ -13,10 +13,6 @@ namespace OpenGL {
|
||||
struct ScreenInfo;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
struct ScreenInfo;
|
||||
}
|
||||
|
||||
namespace Pica::Shader {
|
||||
struct OutputVertex;
|
||||
} // namespace Pica::Shader
|
||||
@ -84,13 +80,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attempt to use a faster method to display the framebuffer to screen
|
||||
virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
|
||||
PAddr framebuffer_addr, u32 pixel_stride,
|
||||
Vulkan::ScreenInfo& screen_info) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attempt to draw using hardware shaders
|
||||
virtual bool AccelerateDrawBatch(bool is_indexed) {
|
||||
return false;
|
||||
|
@ -14,80 +14,80 @@
|
||||
|
||||
namespace Pica {
|
||||
|
||||
enum class FragmentOperationMode : u32 {
|
||||
Default = 0,
|
||||
Gas = 1,
|
||||
Shadow = 3,
|
||||
};
|
||||
|
||||
enum class LogicOp : u32 {
|
||||
Clear = 0,
|
||||
And = 1,
|
||||
AndReverse = 2,
|
||||
Copy = 3,
|
||||
Set = 4,
|
||||
CopyInverted = 5,
|
||||
NoOp = 6,
|
||||
Invert = 7,
|
||||
Nand = 8,
|
||||
Or = 9,
|
||||
Nor = 10,
|
||||
Xor = 11,
|
||||
Equiv = 12,
|
||||
AndInverted = 13,
|
||||
OrReverse = 14,
|
||||
OrInverted = 15,
|
||||
};
|
||||
|
||||
enum class BlendEquation : u32 {
|
||||
Add = 0,
|
||||
Subtract = 1,
|
||||
ReverseSubtract = 2,
|
||||
Min = 3,
|
||||
Max = 4,
|
||||
};
|
||||
|
||||
enum class BlendFactor : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
SourceColor = 2,
|
||||
OneMinusSourceColor = 3,
|
||||
DestColor = 4,
|
||||
OneMinusDestColor = 5,
|
||||
SourceAlpha = 6,
|
||||
OneMinusSourceAlpha = 7,
|
||||
DestAlpha = 8,
|
||||
OneMinusDestAlpha = 9,
|
||||
ConstantColor = 10,
|
||||
OneMinusConstantColor = 11,
|
||||
ConstantAlpha = 12,
|
||||
OneMinusConstantAlpha = 13,
|
||||
SourceAlphaSaturate = 14,
|
||||
};
|
||||
|
||||
enum class CompareFunc : u32 {
|
||||
Never = 0,
|
||||
Always = 1,
|
||||
Equal = 2,
|
||||
NotEqual = 3,
|
||||
LessThan = 4,
|
||||
LessThanOrEqual = 5,
|
||||
GreaterThan = 6,
|
||||
GreaterThanOrEqual = 7,
|
||||
};
|
||||
|
||||
enum class StencilAction : u32 {
|
||||
Keep = 0,
|
||||
Zero = 1,
|
||||
Replace = 2,
|
||||
Increment = 3,
|
||||
Decrement = 4,
|
||||
Invert = 5,
|
||||
IncrementWrap = 6,
|
||||
DecrementWrap = 7,
|
||||
};
|
||||
|
||||
struct FramebufferRegs {
|
||||
enum class FragmentOperationMode : u32 {
|
||||
Default = 0,
|
||||
Gas = 1,
|
||||
Shadow = 3,
|
||||
};
|
||||
|
||||
enum class LogicOp : u32 {
|
||||
Clear = 0,
|
||||
And = 1,
|
||||
AndReverse = 2,
|
||||
Copy = 3,
|
||||
Set = 4,
|
||||
CopyInverted = 5,
|
||||
NoOp = 6,
|
||||
Invert = 7,
|
||||
Nand = 8,
|
||||
Or = 9,
|
||||
Nor = 10,
|
||||
Xor = 11,
|
||||
Equiv = 12,
|
||||
AndInverted = 13,
|
||||
OrReverse = 14,
|
||||
OrInverted = 15,
|
||||
};
|
||||
|
||||
enum class BlendEquation : u32 {
|
||||
Add = 0,
|
||||
Subtract = 1,
|
||||
ReverseSubtract = 2,
|
||||
Min = 3,
|
||||
Max = 4,
|
||||
};
|
||||
|
||||
enum class BlendFactor : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
SourceColor = 2,
|
||||
OneMinusSourceColor = 3,
|
||||
DestColor = 4,
|
||||
OneMinusDestColor = 5,
|
||||
SourceAlpha = 6,
|
||||
OneMinusSourceAlpha = 7,
|
||||
DestAlpha = 8,
|
||||
OneMinusDestAlpha = 9,
|
||||
ConstantColor = 10,
|
||||
OneMinusConstantColor = 11,
|
||||
ConstantAlpha = 12,
|
||||
OneMinusConstantAlpha = 13,
|
||||
SourceAlphaSaturate = 14,
|
||||
};
|
||||
|
||||
enum class CompareFunc : u32 {
|
||||
Never = 0,
|
||||
Always = 1,
|
||||
Equal = 2,
|
||||
NotEqual = 3,
|
||||
LessThan = 4,
|
||||
LessThanOrEqual = 5,
|
||||
GreaterThan = 6,
|
||||
GreaterThanOrEqual = 7,
|
||||
};
|
||||
|
||||
enum class StencilAction : u32 {
|
||||
Keep = 0,
|
||||
Zero = 1,
|
||||
Replace = 2,
|
||||
Increment = 3,
|
||||
Decrement = 4,
|
||||
Invert = 5,
|
||||
IncrementWrap = 6,
|
||||
DecrementWrap = 7,
|
||||
};
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 2, FragmentOperationMode> fragment_operation_mode;
|
||||
|
@ -12,6 +12,13 @@
|
||||
|
||||
namespace Pica {
|
||||
|
||||
enum class TriangleTopology : u32 {
|
||||
List = 0,
|
||||
Strip = 1,
|
||||
Fan = 2,
|
||||
Shader = 3, // Programmable setup unit implemented in a geometry shader
|
||||
};
|
||||
|
||||
struct PipelineRegs {
|
||||
enum class VertexAttributeFormat : u32 {
|
||||
BYTE = 0,
|
||||
@ -250,13 +257,6 @@ struct PipelineRegs {
|
||||
|
||||
INSERT_PADDING_WORDS(0x9);
|
||||
|
||||
enum class TriangleTopology : u32 {
|
||||
List = 0,
|
||||
Strip = 1,
|
||||
Fan = 2,
|
||||
Shader = 3, // Programmable setup unit implemented in a geometry shader
|
||||
};
|
||||
|
||||
BitField<8, 2, TriangleTopology> triangle_topology;
|
||||
|
||||
u32 restart_primitive;
|
||||
|
@ -6,21 +6,20 @@
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/pica_types.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
struct RasterizerRegs {
|
||||
enum class CullMode : u32 {
|
||||
// Select which polygons are considered to be "frontfacing".
|
||||
KeepAll = 0,
|
||||
KeepClockWise = 1,
|
||||
KeepCounterClockWise = 2,
|
||||
// TODO: What does the third value imply?
|
||||
};
|
||||
// Select which polygons are considered to be "frontfacing".
|
||||
enum class CullMode : u32 {
|
||||
KeepAll = 0,
|
||||
KeepClockWise = 1,
|
||||
KeepCounterClockWise = 2,
|
||||
KeepAll2 = 3 // Same as KeepAll
|
||||
};
|
||||
|
||||
struct RasterizerRegs {
|
||||
union {
|
||||
BitField<0, 2, CullMode> cull_mode;
|
||||
};
|
||||
|
@ -13,6 +13,23 @@
|
||||
|
||||
namespace Pica {
|
||||
|
||||
enum WrapMode : u32 {
|
||||
ClampToEdge = 0,
|
||||
ClampToBorder = 1,
|
||||
Repeat = 2,
|
||||
MirroredRepeat = 3,
|
||||
// Mode 4-7 produces some weird result and may be just invalid:
|
||||
ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat
|
||||
ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat
|
||||
Repeat2 = 6, // Same as Repeat
|
||||
Repeat3 = 7, // Same as Repeat
|
||||
};
|
||||
|
||||
enum TextureFilter : u32 {
|
||||
Nearest = 0,
|
||||
Linear = 1,
|
||||
};
|
||||
|
||||
struct TexturingRegs {
|
||||
struct TextureConfig {
|
||||
enum TextureType : u32 {
|
||||
@ -24,23 +41,6 @@ struct TexturingRegs {
|
||||
Disabled = 5,
|
||||
};
|
||||
|
||||
enum WrapMode : u32 {
|
||||
ClampToEdge = 0,
|
||||
ClampToBorder = 1,
|
||||
Repeat = 2,
|
||||
MirroredRepeat = 3,
|
||||
// Mode 4-7 produces some weird result and may be just invalid:
|
||||
ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat
|
||||
ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat
|
||||
Repeat2 = 6, // Same as Repeat
|
||||
Repeat3 = 7, // Same as Repeat
|
||||
};
|
||||
|
||||
enum TextureFilter : u32 {
|
||||
Nearest = 0,
|
||||
Linear = 1,
|
||||
};
|
||||
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 8, u32> r;
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include <memory>
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/swrasterizer/swrasterizer.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
|
@ -414,7 +414,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
|
||||
MICROPROFILE_SCOPE(OpenGL_GS);
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
|
||||
if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
|
||||
if (regs.pipeline.use_gs != Pica::UseGS::No) {
|
||||
LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader");
|
||||
return false;
|
||||
}
|
||||
|
@ -9,18 +9,17 @@
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <fmt/format.h>
|
||||
#include <nihstro/shader_bytecode.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader_compiler/frontend/opcode.h"
|
||||
#include "video_core/shader_compiler/frontned/instruction.h"
|
||||
#include "video_core/shader_compiler/frontend/register.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
|
||||
namespace OpenGL::ShaderDecompiler {
|
||||
using Pica::Shader::OpCode;
|
||||
using Pica::Shader::DestRegister;
|
||||
|
||||
using nihstro::Instruction;
|
||||
using nihstro::OpCode;
|
||||
using nihstro::RegisterType;
|
||||
using nihstro::SourceRegister;
|
||||
using nihstro::SwizzlePattern;
|
||||
namespace OpenGL::ShaderDecompiler {
|
||||
|
||||
constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH;
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/shader_compiler/shader.h"
|
||||
|
||||
namespace OpenGL::ShaderDecompiler {
|
||||
|
||||
|
@ -175,11 +175,11 @@ public:
|
||||
|
||||
void Create(const char* source, GLenum type) {
|
||||
if (shader_or_program.which() == 0) {
|
||||
boost::get<OGLShader>(shader_or_program).Create(source, type);
|
||||
std::get<OGLShader>(shader_or_program).Create(source, type);
|
||||
} else {
|
||||
OGLShader shader;
|
||||
shader.Create(source, type);
|
||||
OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
|
||||
OGLProgram& program = std::get<OGLProgram>(shader_or_program);
|
||||
program.Create(true, {shader.handle});
|
||||
SetShaderUniformBlockBindings(program.handle);
|
||||
|
||||
@ -191,9 +191,9 @@ public:
|
||||
|
||||
GLuint GetHandle() const {
|
||||
if (shader_or_program.which() == 0) {
|
||||
return boost::get<OGLShader>(shader_or_program).handle;
|
||||
return std::get<OGLShader>(shader_or_program).handle;
|
||||
} else {
|
||||
return boost::get<OGLProgram>(shader_or_program).handle;
|
||||
return std::get<OGLProgram>(shader_or_program).handle;
|
||||
}
|
||||
}
|
||||
|
||||
@ -204,7 +204,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
boost::variant<OGLShader, OGLProgram> shader_or_program;
|
||||
std::variant<OGLShader, OGLProgram> shader_or_program;
|
||||
};
|
||||
|
||||
class TrivialVertexShader {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
@ -8,28 +8,32 @@
|
||||
#include <glm/glm.hpp>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_lighting.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace PicaToVK {
|
||||
|
||||
using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter;
|
||||
|
||||
struct FilterInfo {
|
||||
vk::Filter mag_filter, min_filter;
|
||||
vk::SamplerMipmapMode mip_mode;
|
||||
};
|
||||
|
||||
inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) {
|
||||
std::array<vk::Filter, 2> filter_table = { vk::Filter::eNearest, vk::Filter::eLinear };
|
||||
std::array<vk::SamplerMipmapMode, 2> mipmap_table = { vk::SamplerMipmapMode::eNearest, vk::SamplerMipmapMode::eLinear };
|
||||
inline FilterInfo TextureFilterMode(Pica::TextureFilter mag, Pica::TextureFilter min,
|
||||
Pica::TextureFilter mip) {
|
||||
constexpr std::array filter_table = {
|
||||
vk::Filter::eNearest,
|
||||
vk::Filter::eLinear
|
||||
};
|
||||
|
||||
return FilterInfo{filter_table[mag], filter_table[min], mipmap_table[mip]};
|
||||
constexpr std::array mipmap_table = {
|
||||
vk::SamplerMipmapMode::eNearest,
|
||||
vk::SamplerMipmapMode::eLinear
|
||||
};
|
||||
|
||||
return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)};
|
||||
}
|
||||
|
||||
inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
|
||||
inline vk::SamplerAddressMode WrapMode(Pica::WrapMode mode) {
|
||||
static constexpr std::array<vk::SamplerAddressMode, 8> wrap_mode_table{{
|
||||
vk::SamplerAddressMode::eClampToEdge,
|
||||
vk::SamplerAddressMode::eClampToBorder,
|
||||
@ -63,7 +67,7 @@ inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapM
|
||||
return wrap_mode_table[index];
|
||||
}
|
||||
|
||||
inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
|
||||
inline vk::BlendOp BlendEquation(Pica::BlendEquation equation) {
|
||||
static constexpr std::array<vk::BlendOp, 5> blend_equation_table{{
|
||||
vk::BlendOp::eAdd,
|
||||
vk::BlendOp::eSubtract,
|
||||
@ -85,7 +89,7 @@ inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation)
|
||||
return blend_equation_table[index];
|
||||
}
|
||||
|
||||
inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
|
||||
inline vk::BlendFactor BlendFunc(Pica::BlendFactor factor) {
|
||||
static constexpr std::array<vk::BlendFactor, 15> blend_func_table{{
|
||||
vk::BlendFactor::eZero, // BlendFactor::Zero
|
||||
vk::BlendFactor::eOne, // BlendFactor::One
|
||||
@ -117,7 +121,7 @@ inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
|
||||
return blend_func_table[index];
|
||||
}
|
||||
|
||||
inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
|
||||
inline vk::LogicOp LogicOp(Pica::LogicOp op) {
|
||||
static constexpr std::array<vk::LogicOp, 16> logic_op_table{{
|
||||
vk::LogicOp::eClear, // Clear
|
||||
vk::LogicOp::eAnd, // And
|
||||
@ -150,7 +154,7 @@ inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
|
||||
return logic_op_table[index];
|
||||
}
|
||||
|
||||
inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
|
||||
inline vk::CompareOp CompareFunc(Pica::CompareFunc func) {
|
||||
static constexpr std::array<vk::CompareOp, 8> compare_func_table{{
|
||||
vk::CompareOp::eNever, // CompareFunc::Never
|
||||
vk::CompareOp::eAlways, // CompareFunc::Always
|
||||
@ -175,7 +179,7 @@ inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
|
||||
return compare_func_table[index];
|
||||
}
|
||||
|
||||
inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
|
||||
inline vk::StencilOp StencilOp(Pica::StencilAction action) {
|
||||
static constexpr std::array<vk::StencilOp, 8> stencil_op_table{{
|
||||
vk::StencilOp::eKeep, // StencilAction::Keep
|
||||
vk::StencilOp::eZero, // StencilAction::Zero
|
||||
@ -200,6 +204,30 @@ inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
|
||||
return stencil_op_table[index];
|
||||
}
|
||||
|
||||
inline vk::PrimitiveTopology PrimitiveTopology(Pica::TriangleTopology topology) {
|
||||
switch (topology) {
|
||||
case Pica::TriangleTopology::Fan:
|
||||
return vk::PrimitiveTopology::eTriangleFan;
|
||||
case Pica::TriangleTopology::List:
|
||||
case Pica::TriangleTopology::Shader:
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Pica::TriangleTopology::Strip:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
}
|
||||
}
|
||||
|
||||
inline vk::CullModeFlags CullMode(Pica::CullMode mode) {
|
||||
switch (mode) {
|
||||
case Pica::CullMode::KeepAll:
|
||||
case Pica::CullMode::KeepAll2:
|
||||
return vk::CullModeFlagBits::eNone;
|
||||
case Pica::CullMode::KeepClockWise:
|
||||
return vk::CullModeFlagBits::eBack;
|
||||
case Pica::CullMode::KeepCounterClockWise:
|
||||
return vk::CullModeFlagBits::eFront;
|
||||
}
|
||||
}
|
||||
|
||||
inline glm::vec4 ColorRGBA8(const u32 color) {
|
||||
return glm::vec4{
|
||||
(color >> 0 & 0xFF) / 255.0f,
|
||||
|
@ -2,22 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Enable vulkan platforms
|
||||
#if defined(ANDROID) || defined (__ANDROID__)
|
||||
#define VK_USE_PLATFORM_ANDROID_KHR 1
|
||||
#elif defined(_WIN32)
|
||||
#define VK_USE_PLATFORM_WIN32_KHR 1
|
||||
#elif defined(__APPLE__)
|
||||
#define VK_USE_PLATFORM_MACOS_MVK 1
|
||||
#define VK_USE_PLATFORM_METAL_EXT 1
|
||||
#else
|
||||
#ifdef WAYLAND_DISPLAY
|
||||
#define VK_USE_PLATFORM_WAYLAND_KHR 1
|
||||
#else // wayland
|
||||
#define VK_USE_PLATFORM_XLIB_KHR 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <glm/gtc/matrix_transform.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
@ -56,83 +40,6 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
|
||||
const Frontend::EmuWindow& emu_window) {
|
||||
const auto& window_info = emu_window.GetWindowInfo();
|
||||
vk::SurfaceKHR surface;
|
||||
|
||||
#if VK_USE_PLATFORM_WIN32_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::Windows) {
|
||||
const HWND hWnd = static_cast<HWND>(window_info.render_surface);
|
||||
const vk::Win32SurfaceCreateInfoKHR win32_ci{{}, nullptr, hWnd};
|
||||
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#elif VK_USE_PLATFORM_XLIB_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::X11) {
|
||||
const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
|
||||
static_cast<Display*>(window_info.display_connection),
|
||||
reinterpret_cast<Window>(window_info.render_surface)};
|
||||
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
#elif VK_USE_PLATFORM_WAYLAND_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
|
||||
static_cast<wl_display*>(window_info.display_connection),
|
||||
static_cast<wl_surface*>(window_info.render_surface)};
|
||||
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!surface) {
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return surface;
|
||||
}
|
||||
|
||||
std::vector<const char*> RequiredExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(6);
|
||||
switch (window_type) {
|
||||
case Frontend::WindowSystemType::Headless:
|
||||
break;
|
||||
#ifdef _WIN32
|
||||
case Frontend::WindowSystemType::Windows:
|
||||
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
#if !defined(_WIN32) && !defined(__APPLE__)
|
||||
case Frontend::WindowSystemType::X11:
|
||||
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
case Frontend::WindowSystemType::Wayland:
|
||||
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
break;
|
||||
}
|
||||
if (window_type != Frontend::WindowSystemType::Headless) {
|
||||
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
|
||||
}
|
||||
if (enable_debug_utils) {
|
||||
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
|
||||
return extensions;
|
||||
}
|
||||
|
||||
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
|
||||
: RendererBase{window} {
|
||||
|
||||
|
178
src/video_core/renderer_vulkan/vk_backend.cpp
Normal file
178
src/video_core/renderer_vulkan/vk_backend.cpp
Normal file
@ -0,0 +1,178 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "core/core.h"
|
||||
#include "common/object_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_backend.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture.h"
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
Backend::Backend(Frontend::EmuWindow& window) : BackendBase(window),
|
||||
instance(window), swapchain(instance, instance.GetSurface()),
|
||||
scheduler(instance) {
|
||||
|
||||
// TODO: Properly report GPU hardware
|
||||
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
|
||||
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
|
||||
telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA");
|
||||
telemetry_session.AddField(user_system, "GPU_Model", "GTX 1650");
|
||||
telemetry_session.AddField(user_system, "GPU_Vulkan_Version", "Vulkan 1.3");
|
||||
|
||||
// Pre-create all needed renderpasses by the renderer
|
||||
constexpr std::array color_formats = {
|
||||
vk::Format::eR8G8B8A8Unorm,
|
||||
vk::Format::eR8G8B8Unorm,
|
||||
vk::Format::eR5G5B5A1UnormPack16,
|
||||
vk::Format::eR5G6B5UnormPack16,
|
||||
vk::Format::eR4G4B4A4UnormPack16
|
||||
};
|
||||
|
||||
constexpr std::array depth_stencil_formats = {
|
||||
vk::Format::eD16Unorm,
|
||||
vk::Format::eX8D24UnormPack32,
|
||||
vk::Format::eD24UnormS8Uint,
|
||||
};
|
||||
|
||||
// Create all required renderpasses
|
||||
for (u32 color = 0; color < MAX_COLOR_FORMATS; color++) {
|
||||
for (u32 depth = 0; depth < MAX_DEPTH_FORMATS; depth++) {
|
||||
u32 index = color * MAX_COLOR_FORMATS + depth;
|
||||
renderpass_cache[index] = CreateRenderPass(color_formats[color], depth_stencil_formats[depth]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Backend::~Backend() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (auto& renderpass : renderpass_cache) {
|
||||
device.destroyRenderPass(renderpass);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* To avoid many small heap allocations during handle creation, each resource has a dedicated pool
|
||||
* associated with it that batch allocates memory.
|
||||
*/
|
||||
BufferHandle Backend::CreateBuffer(BufferInfo info) {
|
||||
static ObjectPool<Buffer> buffer_pool;
|
||||
return IntrusivePtr<Buffer>{buffer_pool.Allocate(info)};
|
||||
}
|
||||
|
||||
FramebufferHandle Backend::CreateFramebuffer(FramebufferInfo info) {
|
||||
}
|
||||
|
||||
TextureHandle Backend::CreateTexture(TextureInfo info) {
|
||||
static ObjectPool<Texture> texture_pool;
|
||||
return IntrusivePtr<Texture>{texture_pool.Allocate(info)};
|
||||
}
|
||||
|
||||
PipelineHandle Backend::CreatePipeline(PipelineType type, PipelineInfo info) {
|
||||
static ObjectPool<Pipeline> pipeline_pool;
|
||||
|
||||
// Find a pipeline layout first
|
||||
if (auto iter = pipeline_layouts.find(info.layout); iter != pipeline_layouts.end()) {
|
||||
PipelineLayout& layout = iter->second;
|
||||
|
||||
return IntrusivePtr<Pipeline>{pipeline_pool.Allocate(instance, layout, type, info, cache)};
|
||||
}
|
||||
|
||||
// Create the layout
|
||||
auto result = pipeline_layouts.emplace(info.layout, PipelineLayout{instance, info.layout});
|
||||
return IntrusivePtr<Pipeline>{pipeline_pool.Allocate(instance, result.first->second, type, info, cache)};
|
||||
}
|
||||
|
||||
SamplerHandle Backend::CreateSampler(SamplerInfo info) {
|
||||
static ObjectPool<Sampler> sampler_pool;
|
||||
return IntrusivePtr<Sampler>{sampler_pool.Allocate(info)};
|
||||
}
|
||||
|
||||
void Backend::Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
|
||||
BufferHandle vertex_buffer,
|
||||
u32 base_vertex, u32 num_vertices) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
|
||||
Buffer* vertex = static_cast<Buffer*>(vertex_buffer.Get());
|
||||
command_buffer.bindVertexBuffers(0, vertex->GetHandle(), {0});
|
||||
|
||||
// Submit draw
|
||||
command_buffer.draw(num_vertices, 1, base_vertex, 0);
|
||||
}
|
||||
|
||||
void Backend::DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
|
||||
BufferHandle vertex_buffer, BufferHandle index_buffer,
|
||||
u32 base_index, u32 num_indices, u32 base_vertex) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
vk::RenderPass Backend::CreateRenderPass(vk::Format color, vk::Format depth) const {
|
||||
// Define attachments
|
||||
const std::array attachments = {
|
||||
vk::AttachmentDescription{
|
||||
.format = color,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
|
||||
.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
|
||||
.finalLayout = vk::ImageLayout::eColorAttachmentOptimal
|
||||
},
|
||||
vk::AttachmentDescription{
|
||||
.format = depth,
|
||||
.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
|
||||
.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
}
|
||||
};
|
||||
|
||||
// Our renderpasses only defines one color and depth attachment
|
||||
const vk::AttachmentReference color_attachment_ref = {
|
||||
.attachment = 0,
|
||||
.layout = vk::ImageLayout::eColorAttachmentOptimal
|
||||
};
|
||||
|
||||
const vk::AttachmentReference depth_attachment_ref = {
|
||||
.attachment = 1,
|
||||
.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
};
|
||||
|
||||
const vk::SubpassDependency subpass_dependency = {
|
||||
.srcSubpass = VK_SUBPASS_EXTERNAL,
|
||||
.dstSubpass = 0,
|
||||
.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput |
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests,
|
||||
.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput |
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests,
|
||||
.srcAccessMask = vk::AccessFlagBits::eNone,
|
||||
.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion
|
||||
};
|
||||
|
||||
// We also require only one subpass
|
||||
const vk::SubpassDescription subpass = {
|
||||
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachments = &color_attachment_ref,
|
||||
.pResolveAttachments = 0,
|
||||
.pDepthStencilAttachment = &depth_attachment_ref
|
||||
};
|
||||
|
||||
const vk::RenderPassCreateInfo renderpass_info = {
|
||||
.attachmentCount = 2,
|
||||
.pAttachments = attachments.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 1,
|
||||
.pDependencies = &subpass_dependency
|
||||
};
|
||||
|
||||
// Create the renderpass
|
||||
vk::Device device = instance.GetDevice();
|
||||
return device.createRenderPass(renderpass_info);
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
75
src/video_core/renderer_vulkan/vk_backend.h
Normal file
75
src/video_core/renderer_vulkan/vk_backend.h
Normal file
@ -0,0 +1,75 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include "video_core/common/backend.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline.h"
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
class Texture;
|
||||
|
||||
constexpr u32 RENDERPASS_COUNT = MAX_COLOR_FORMATS * MAX_DEPTH_FORMATS;
|
||||
|
||||
class Backend : public VideoCore::BackendBase {
|
||||
public:
|
||||
Backend(Frontend::EmuWindow& window);
|
||||
~Backend();
|
||||
|
||||
void SwapBuffers() override;
|
||||
|
||||
BufferHandle CreateBuffer(BufferInfo info) override;
|
||||
|
||||
FramebufferHandle CreateFramebuffer(FramebufferInfo info) override;
|
||||
|
||||
TextureHandle CreateTexture(TextureInfo info) override;
|
||||
|
||||
PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) override;
|
||||
|
||||
SamplerHandle CreateSampler(SamplerInfo info) override;
|
||||
|
||||
void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
|
||||
BufferHandle vertex_buffer,
|
||||
u32 base_vertex, u32 num_vertices) override;
|
||||
|
||||
void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
|
||||
BufferHandle vertex_buffer, BufferHandle index_buffer,
|
||||
u32 base_index, u32 num_indices, u32 base_vertex) override;
|
||||
|
||||
void DispatchCompute(PipelineHandle pipeline, Common::Vec3<u32> groupsize,
|
||||
Common::Vec3<u32> groups) override;
|
||||
|
||||
// Returns the vulkan instance
|
||||
inline const Instance& GetInstance() const {
|
||||
return instance;
|
||||
}
|
||||
|
||||
// Returns the vulkan command buffer scheduler
|
||||
inline CommandScheduler& GetScheduler() {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth) const;
|
||||
|
||||
private:
|
||||
Instance instance;
|
||||
Swapchain swapchain;
|
||||
CommandScheduler scheduler;
|
||||
|
||||
// The formats Citra uses are limited so we can pre-create
|
||||
// all the renderpasses we will need
|
||||
std::array<vk::RenderPass, RENDERPASS_COUNT> renderpass_cache;
|
||||
vk::PipelineCache cache;
|
||||
|
||||
// Pipeline layout cache
|
||||
std::unordered_map<PipelineLayoutInfo, PipelineLayout> pipeline_layouts;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
@ -2,165 +2,181 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include <cstring>
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
inline vk::BufferUsageFlags ToVkBufferUsage(BufferUsage usage) {
|
||||
constexpr std::array vk_buffer_usages = {
|
||||
vk::BufferUsageFlagBits::eVertexBuffer,
|
||||
vk::BufferUsageFlagBits::eIndexBuffer,
|
||||
vk::BufferUsageFlagBits::eUniformBuffer,
|
||||
vk::BufferUsageFlagBits::eUniformTexelBuffer,
|
||||
vk::BufferUsageFlagBits::eTransferSrc
|
||||
};
|
||||
|
||||
return vk::BufferUsageFlagBits::eTransferDst |
|
||||
vk_buffer_usages.at(static_cast<u32>(usage));
|
||||
}
|
||||
|
||||
inline vk::Format ToVkViewFormat(ViewFormat format) {
|
||||
constexpr std::array vk_view_formats = {
|
||||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR32G32Sfloat,
|
||||
vk::Format::eR32G32B32Sfloat,
|
||||
vk::Format::eR32G32B32A32Sfloat
|
||||
};
|
||||
|
||||
return vk_view_formats.at(static_cast<u32>(format));
|
||||
}
|
||||
|
||||
Buffer::Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info) :
|
||||
BufferBase(info), instance(instance), scheduler(scheduler) {
|
||||
|
||||
vk::BufferCreateInfo buffer_info = {
|
||||
.size = info.capacity,
|
||||
.usage = ToVkBufferUsage(info.usage)
|
||||
};
|
||||
|
||||
VmaAllocationCreateInfo alloc_create_info = {
|
||||
.flags = info.usage == BufferUsage::Staging ?
|
||||
(VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT) :
|
||||
VmaAllocationCreateFlags{},
|
||||
.usage = VMA_MEMORY_USAGE_AUTO
|
||||
};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
// Allocate texture memory
|
||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
|
||||
&unsafe_buffer, &allocation, &alloc_info);
|
||||
buffer = vk::Buffer{unsafe_buffer};
|
||||
|
||||
u32 view = 0;
|
||||
vk::Device device = instance.GetDevice();
|
||||
while (info.views[view] != ViewFormat::Undefined) {
|
||||
const vk::BufferViewCreateInfo view_info = {
|
||||
.buffer = buffer,
|
||||
.format = ToVkViewFormat(info.views[view]),
|
||||
.range = info.capacity
|
||||
};
|
||||
|
||||
views[view++] = device.createBufferView(view_info);
|
||||
}
|
||||
|
||||
// Map memory
|
||||
if (info.usage == BufferUsage::Staging) {
|
||||
mapped_ptr = alloc_info.pMappedData;
|
||||
}
|
||||
}
|
||||
|
||||
Buffer::~Buffer() {
|
||||
Destroy();
|
||||
}
|
||||
|
||||
void Buffer::Create(const Buffer::Info& info) {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
buffer_info = info;
|
||||
|
||||
vk::BufferCreateInfo bufferInfo({}, info.size, info.usage);
|
||||
buffer = device.createBuffer(bufferInfo);
|
||||
|
||||
auto mem_requirements = device.getBufferMemoryRequirements(buffer);
|
||||
|
||||
auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, info.properties);
|
||||
vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index);
|
||||
|
||||
memory = device.allocateMemory(alloc_info);
|
||||
device.bindBufferMemory(buffer, memory, 0);
|
||||
|
||||
// Optionally map the buffer to CPU memory
|
||||
if (info.properties & vk::MemoryPropertyFlagBits::eHostVisible) {
|
||||
host_ptr = device.mapMemory(memory, 0, info.size);
|
||||
}
|
||||
|
||||
for (auto& format : info.view_formats) {
|
||||
if (format != vk::Format::eUndefined) {
|
||||
views[view_count++] = device.createBufferView({{}, buffer, format, 0, info.size});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Buffer::Recreate() {
|
||||
Destroy();
|
||||
Create(buffer_info);
|
||||
}
|
||||
|
||||
void Buffer::Destroy() {
|
||||
if (buffer) {
|
||||
if (host_ptr != nullptr) {
|
||||
g_vk_instace->GetDevice().unmapMemory(memory);
|
||||
}
|
||||
auto deleter = [allocation = allocation,
|
||||
buffer = buffer,
|
||||
views = views](vk::Device device, VmaAllocator allocator) {
|
||||
vmaDestroyBuffer(allocator, static_cast<VkBuffer>(buffer), allocation);
|
||||
|
||||
auto deleter = [buffer = buffer,
|
||||
memory = memory,
|
||||
view_count = view_count,
|
||||
views = views]() {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
device.destroyBuffer(buffer);
|
||||
device.freeMemory(memory);
|
||||
|
||||
for (u32 i = 0; i < view_count; i++) {
|
||||
device.destroyBufferView(views[i]);
|
||||
u32 view_index = 0;
|
||||
while (views[view_index]) {
|
||||
device.destroyBufferView(views[view_index++]);
|
||||
}
|
||||
};
|
||||
|
||||
g_vk_task_scheduler->Schedule(deleter);
|
||||
}
|
||||
}
|
||||
|
||||
u32 Buffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties) {
|
||||
vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties();
|
||||
|
||||
for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++)
|
||||
{
|
||||
auto flags = mem_properties.memoryTypes[i].propertyFlags;
|
||||
if ((type_filter & (1 << i)) && (flags & properties) == properties)
|
||||
return i;
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to find suitable memory type.");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void Buffer::Upload(std::span<const std::byte> data, u32 offset,
|
||||
vk::AccessFlags access_to_block,
|
||||
vk::PipelineStageFlags stage_to_block) {
|
||||
auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer();
|
||||
// For small data uploads use vkCmdUpdateBuffer
|
||||
if (data.size_bytes() < 1024) {
|
||||
cmdbuffer.updateBuffer(buffer, 0, data.size_bytes(), data.data());
|
||||
}
|
||||
else {
|
||||
auto [ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(data.size());
|
||||
if (!ptr) {
|
||||
LOG_ERROR(Render_Vulkan, "Cannot upload data without staging buffer!");
|
||||
// Delete the buffer immediately if it's allocated in host memory
|
||||
if (info.usage == BufferUsage::Staging) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
deleter(device, allocator);
|
||||
} else {
|
||||
scheduler.Schedule(deleter);
|
||||
}
|
||||
|
||||
// Copy pixels to staging buffer
|
||||
std::memcpy(ptr, data.data(), data.size_bytes());
|
||||
|
||||
auto region = vk::BufferCopy{staging_offset, offset, data.size_bytes()};
|
||||
auto& staging = g_vk_task_scheduler->GetStaging();
|
||||
cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, region);
|
||||
}
|
||||
|
||||
vk::BufferMemoryBarrier barrier{
|
||||
vk::AccessFlagBits::eTransferWrite, access_to_block,
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
|
||||
buffer, offset, data.size_bytes()
|
||||
};
|
||||
|
||||
// Add a pipeline barrier for the region modified
|
||||
cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block,
|
||||
vk::DependencyFlagBits::eByRegion,
|
||||
0, nullptr, 1, &barrier, 0, nullptr);
|
||||
}
|
||||
|
||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
|
||||
ASSERT(size <= buffer_info.size);
|
||||
ASSERT(alignment <= buffer_info.size);
|
||||
std::span<u8> Buffer::Map(u32 size, u32 alignment) {
|
||||
ASSERT(size <= info.capacity && alignment <= info.capacity);
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
buffer_offset = Common::AlignUp<std::size_t>(buffer_offset, alignment);
|
||||
}
|
||||
|
||||
bool invalidate = false;
|
||||
if (buffer_pos + size > buffer_info.size) {
|
||||
buffer_pos = 0;
|
||||
invalidate = true;
|
||||
// If the buffer is full, invalidate it
|
||||
if (buffer_offset + size > info.capacity) {
|
||||
Invalidate();
|
||||
}
|
||||
|
||||
auto [staging_ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(size);
|
||||
mapped_chunk = vk::BufferCopy{staging_offset, buffer_pos, size};
|
||||
|
||||
return std::make_tuple(staging_ptr, buffer_pos, invalidate);
|
||||
if (info.usage == BufferUsage::Staging) {
|
||||
return std::span<u8>{reinterpret_cast<u8*>(mapped_ptr) + buffer_offset, size};
|
||||
} else {
|
||||
Buffer& staging = scheduler.GetCommandUploadBuffer();
|
||||
return staging.Map(size, alignment);
|
||||
}
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit(u32 size, vk::AccessFlags access_to_block,
|
||||
vk::PipelineStageFlags stage_to_block) {
|
||||
if (size > 0) {
|
||||
mapped_chunk.size = size;
|
||||
void Buffer::Commit(u32 size) {
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
if (info.usage == BufferUsage::Staging && size > 0) {
|
||||
vmaFlushAllocation(allocator, allocation, buffer_offset, size);
|
||||
} else {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||
Buffer& staging = scheduler.GetCommandUploadBuffer();
|
||||
|
||||
auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer();
|
||||
auto& staging = g_vk_task_scheduler->GetStaging();
|
||||
cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, mapped_chunk);
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = staging.GetCurrentOffset(),
|
||||
.dstOffset = buffer_offset,
|
||||
.size = size
|
||||
};
|
||||
|
||||
vk::BufferMemoryBarrier barrier{
|
||||
vk::AccessFlagBits::eTransferWrite, access_to_block,
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
|
||||
buffer, mapped_chunk.dstOffset, mapped_chunk.size
|
||||
// Copy staging buffer to device local buffer
|
||||
command_buffer.copyBuffer(staging.GetHandle(), buffer, copy_region);
|
||||
|
||||
vk::AccessFlags access_mask;
|
||||
vk::PipelineStageFlags stage_mask;
|
||||
switch (info.usage) {
|
||||
case BufferUsage::Vertex:
|
||||
access_mask = vk::AccessFlagBits::eVertexAttributeRead;
|
||||
stage_mask = vk::PipelineStageFlagBits::eVertexInput;
|
||||
break;
|
||||
case BufferUsage::Index:
|
||||
access_mask = vk::AccessFlagBits::eIndexRead;
|
||||
stage_mask = vk::PipelineStageFlagBits::eVertexInput;
|
||||
break;
|
||||
case BufferUsage::Uniform:
|
||||
case BufferUsage::Texel:
|
||||
access_mask = vk::AccessFlagBits::eUniformRead;
|
||||
stage_mask = vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown BufferUsage flag!");
|
||||
}
|
||||
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = access_mask,
|
||||
.buffer = buffer,
|
||||
.offset = buffer_offset,
|
||||
.size = size
|
||||
};
|
||||
|
||||
// Add a pipeline barrier for the region modified
|
||||
cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block,
|
||||
vk::DependencyFlagBits::eByRegion,
|
||||
0, nullptr, 1, &barrier, 0, nullptr);
|
||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
||||
|
||||
buffer_pos += size;
|
||||
}
|
||||
|
||||
buffer_offset += size;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,80 +4,47 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <span>
|
||||
#include "common/common_types.h"
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/common/buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
constexpr u32 MAX_BUFFER_VIEWS = 5;
|
||||
constexpr u32 MAX_COMMIT_CHUNKS = 6;
|
||||
class Instance;
|
||||
class CommandScheduler;
|
||||
|
||||
/// Generic Vulkan buffer object used by almost every resource
|
||||
class Buffer : public NonCopyable {
|
||||
class Buffer : public VideoCore::BufferBase {
|
||||
public:
|
||||
struct Info {
|
||||
u32 size;
|
||||
vk::MemoryPropertyFlags properties;
|
||||
vk::BufferUsageFlags usage;
|
||||
std::array<vk::Format, MAX_BUFFER_VIEWS> view_formats{};
|
||||
};
|
||||
Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info);
|
||||
~Buffer() override;
|
||||
|
||||
Buffer() = default;
|
||||
~Buffer();
|
||||
std::span<u8> Map(u32 size, u32 alignment = 0) override;
|
||||
|
||||
/// Enable move operations
|
||||
Buffer(Buffer&&) = default;
|
||||
Buffer& operator=(Buffer&&) = default;
|
||||
/// Flushes write to buffer memory
|
||||
void Commit(u32 size = 0) override;
|
||||
|
||||
/// Create a new Vulkan buffer object
|
||||
void Create(const Info& info);
|
||||
void Recreate();
|
||||
void Destroy();
|
||||
/// Returns the Vulkan buffer handle
|
||||
vk::Buffer GetHandle() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/// Global utility functions used by other objects
|
||||
static u32 FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties);
|
||||
|
||||
/// Return a pointer to the mapped memory if the buffer is host mapped
|
||||
u8* GetHostPointer() const { return reinterpret_cast<u8*>(host_ptr); }
|
||||
const vk::BufferView& GetView(u32 i = 0) const { return views[i]; }
|
||||
const vk::Buffer& GetBuffer() const { return buffer; }
|
||||
u32 GetSize() const { return buffer_info.size; }
|
||||
|
||||
void Upload(std::span<const std::byte> data, u32 offset,
|
||||
vk::AccessFlags access_to_block = vk::AccessFlagBits::eVertexAttributeRead,
|
||||
vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexInput);
|
||||
/// Returns an immutable reference to the requested buffer view
|
||||
const vk::BufferView& GetView(u32 index = 0) const {
|
||||
ASSERT(index < view_count);
|
||||
return views[index];
|
||||
}
|
||||
|
||||
protected:
|
||||
Info buffer_info;
|
||||
vk::Buffer buffer;
|
||||
vk::DeviceMemory memory;
|
||||
void* host_ptr = nullptr;
|
||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views;
|
||||
u32 view_count{};
|
||||
};
|
||||
Instance& instance;
|
||||
CommandScheduler& scheduler;
|
||||
|
||||
class StreamBuffer : public Buffer {
|
||||
public:
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, the offset within the buffer,
|
||||
* and the invalidation flag for previous chunks.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
||||
void Commit(u32 size, vk::AccessFlags access_to_block = vk::AccessFlagBits::eUniformRead,
|
||||
vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader);
|
||||
|
||||
private:
|
||||
u32 buffer_pos{};
|
||||
vk::BufferCopy mapped_chunk;
|
||||
// Vulkan buffer handle
|
||||
void* mapped_ptr = nullptr;
|
||||
vk::Buffer buffer = VK_NULL_HANDLE;
|
||||
VmaAllocation allocation = VK_NULL_HANDLE;
|
||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
||||
u32 view_count = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -14,3 +14,74 @@
|
||||
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
|
||||
#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
/// Returns the aligned byte size of each pixel in the specified format
|
||||
constexpr float GetFormatSize(vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
return 4;
|
||||
case vk::Format::eR8G8B8Unorm:
|
||||
return 3;
|
||||
case vk::Format::eR5G5B5A1UnormPack16:
|
||||
case vk::Format::eR5G6B5UnormPack16:
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
case vk::Format::eD16Unorm:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
};
|
||||
}
|
||||
|
||||
/// Return the image aspect associated on the provided format
|
||||
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
|
||||
vk::ImageAspectFlags flags;
|
||||
switch (format) {
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eD32Sfloat:
|
||||
flags = vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
default:
|
||||
flags = vk::ImageAspectFlagBits::eColor;
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
/// Returns a bit mask with the required usage of a format with a particular aspect
|
||||
constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) {
|
||||
auto usage = vk::ImageUsageFlagBits::eSampled |
|
||||
vk::ImageUsageFlagBits::eTransferDst |
|
||||
vk::ImageUsageFlagBits::eTransferSrc;
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||
} else {
|
||||
return usage | vk::ImageUsageFlagBits::eColorAttachment;
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns a bit mask with the required features of a format with a particular aspect
|
||||
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) {
|
||||
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
|
||||
vk::FormatFeatureFlagBits::eTransferDst |
|
||||
vk::FormatFeatureFlagBits::eTransferSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitDst;
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment;
|
||||
} else {
|
||||
return usage | vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
6
src/video_core/renderer_vulkan/vk_format_util.cpp
Normal file
6
src/video_core/renderer_vulkan/vk_format_util.cpp
Normal file
@ -0,0 +1,6 @@
|
||||
#include "vk_format_util.h"
|
||||
|
||||
vk_format_util::vk_format_util()
|
||||
{
|
||||
|
||||
}
|
436
src/video_core/renderer_vulkan/vk_format_util.h
Normal file
436
src/video_core/renderer_vulkan/vk_format_util.h
Normal file
@ -0,0 +1,436 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <bit>
|
||||
#include <string_view>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <bitset>
|
||||
#include <type_traits>
|
||||
#include <vulkan/vulkan_format_traits.hpp>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
enum class SIMD : u8 {
|
||||
None = 0,
|
||||
SSE4 = 1,
|
||||
AVX2 = 2,
|
||||
NEON = 3
|
||||
};
|
||||
|
||||
/**
|
||||
* A Pixel holds a pixel value or a SIMD lane holding multiple "real" pixels
|
||||
*/
|
||||
#pragma pack(1)
|
||||
template <u8 bytes, SIMD simd = SIMD::None>
|
||||
struct Pixel {
|
||||
using StorageType = std::conditional_t<bytes <= 1, u8,
|
||||
std::conditional_t<bytes <= 2, u16,
|
||||
std::conditional_t<bytes <= 4, u32, u64>>>;
|
||||
Pixel() = default;
|
||||
|
||||
// Memory load/store
|
||||
constexpr void Load(u8* memory) {
|
||||
std::memcpy(&storage, memory, bytes);
|
||||
}
|
||||
|
||||
constexpr void Store(u8* memory) const {
|
||||
std::memcpy(memory, &storage, bytes);
|
||||
}
|
||||
|
||||
// Returns the number of bytes until the next pixel
|
||||
constexpr u8 GetStride() const {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
// Bitwise operators
|
||||
constexpr Pixel RotateRight(int n) const {
|
||||
return std::rotr(storage, n);
|
||||
}
|
||||
|
||||
constexpr StorageType operator & (const StorageType mask) const {
|
||||
return storage & mask;
|
||||
}
|
||||
|
||||
constexpr StorageType operator | (const StorageType mask) const {
|
||||
return storage | mask;
|
||||
}
|
||||
|
||||
constexpr StorageType operator >>(const int n) const {
|
||||
return storage >> n;
|
||||
}
|
||||
|
||||
constexpr StorageType operator <<(const int n) const {
|
||||
return storage << n;
|
||||
}
|
||||
|
||||
private:
|
||||
StorageType storage;
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
/**
|
||||
* Information about a pixel format
|
||||
*/
|
||||
template <u8 Components>
|
||||
struct FormatInfo {
|
||||
constexpr FormatInfo(vk::Format format) {
|
||||
for (int i = 0; i < components; i++) {
|
||||
name[i] = vk::componentName(format, i)[0];
|
||||
is_float[i] = std::string_view{vk::componentNumericFormat(format, i)}
|
||||
== "SFLOAT";
|
||||
bits[i] = vk::componentBits(format, i);
|
||||
bit_offset[i] = (i > 0 ? bit_offset[i - 1] + bits[i - 1] : 0);
|
||||
}
|
||||
|
||||
bytes = (format == vk::Format::eD32SfloatS8Uint ? 8 :
|
||||
vk::blockSize(format));
|
||||
}
|
||||
|
||||
static constexpr u32 components = Components;
|
||||
std::array<char, components> name;
|
||||
std::array<bool, components> is_float;
|
||||
std::array<u8, components> bit_offset;
|
||||
std::array<u8, components> bits;
|
||||
u8 bytes; // This includes the padding in D32S8
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents a mapping of components from one format to another
|
||||
*/
|
||||
template <FormatInfo source, FormatInfo dest>
|
||||
struct Mapping {
|
||||
static constexpr u32 component_map_bits = 4;
|
||||
static constexpr u32 component_map_mask = (1 << component_map_bits) - 1;
|
||||
|
||||
constexpr Mapping() {
|
||||
for (int i = 0; i < source.names.size(); i++) {
|
||||
constexpr char source_name = source.names[i];
|
||||
for (u8 j = 0; j < dest.names.size(); j++) {
|
||||
constexpr char dest_name = dest.names[j];
|
||||
if constexpr (source_name == dest_name) {
|
||||
storage |= ((j & component_map_mask) << component_map_bits * i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
constexpr u8 GetMapping(const int component) {
|
||||
return (storage >> (component * component_map_bits)) & component_map_mask;
|
||||
}
|
||||
|
||||
// Returns the number of bits to rotate a pixel to the right
|
||||
// to match the mapping of the destiation format. If it's not
|
||||
// possible returns -1
|
||||
constexpr s32 TestMappingRotation() {
|
||||
constexpr u16 identity = 0x3210;
|
||||
|
||||
u32 total_bits_rotated = 0;
|
||||
auto test_rotation = [&](s32 i) -> bool {
|
||||
return (storage == std::rotr(identity, i * component_map_bits));
|
||||
};
|
||||
|
||||
for (s32 rot = 0; rot < 4; rot++) {
|
||||
if (test_rotation(rot)) {
|
||||
return total_bits_rotated;
|
||||
}
|
||||
|
||||
total_bits_rotated += source.bits[rot];
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Returns true if the each component of the source format has the
|
||||
// same bit-width as the mapped destination format component
|
||||
constexpr bool AreBitwiseEqual() {
|
||||
bool result = source.bytes == dest.bytes;
|
||||
for (int i = 0; i < source.components; i++) {
|
||||
result &= (source.bits[i] == dest.bits[GetMapping(i)]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
// Since there are at most 4 components we can use 4 bits for each component
|
||||
u16 storage = 0xFFFF;
|
||||
};
|
||||
|
||||
// Allows for loop like iteration at compile time
|
||||
template <auto Start, auto End, class F>
|
||||
constexpr void ForEach(F&& f) {
|
||||
if constexpr (Start < End) {
|
||||
f(std::integral_constant<decltype(Start), Start>());
|
||||
ForEach<Start + 1, End>(f);
|
||||
}
|
||||
}
|
||||
|
||||
// Copies pixel data from a source to a destionation buffer, performing
|
||||
// format conversion at the same time
|
||||
template <vk::Format source_format, vk::Format dest_format, SIMD simd>
|
||||
constexpr void Convert2(std::span<const u8> source, std::span<u8> dest) {
|
||||
constexpr u32 source_components = vk::componentCount(source_format);
|
||||
constexpr u32 dest_components = vk::componentCount(dest_format);
|
||||
|
||||
// Query vulkan hpp format traits for the info we need
|
||||
constexpr FormatInfo<source_components> source_info{source_format};
|
||||
constexpr FormatInfo<dest_components> dest_info{dest_format};
|
||||
|
||||
// Create a table with the required component mapping
|
||||
constexpr Mapping<source_info, dest_info> mapping{};
|
||||
|
||||
// Begin conversion
|
||||
u32 source_offset = 0;
|
||||
u32 dest_offset = 0;
|
||||
while (source_offset < source.size()) {
|
||||
// Load source pixel
|
||||
Pixel<source_info.bytes, simd> source_pixel;
|
||||
Pixel<dest_info.bytes, simd> dest_pixel{};
|
||||
|
||||
// Load data into the pixel
|
||||
source_pixel.Load(source.data() + source_offset);
|
||||
|
||||
// OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations
|
||||
// of one another. We can use a faster path for these
|
||||
if constexpr (s32 rot = mapping.TestMappingRotation();
|
||||
rot > -1 && mapping.AreBitwiseEqual()) {
|
||||
dest_pixel = source_pixel.RotateRight(rot);
|
||||
// RGB8 <-> RGBA8 is extrenely common on desktop GPUs
|
||||
// so it deserves a special path
|
||||
} else if constexpr (true) {
|
||||
} else {
|
||||
ForEach<0, source_components>([&](auto comp) {
|
||||
constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3;
|
||||
|
||||
// If the component is not mapped skip it
|
||||
if constexpr (dest_comp == 0xFF) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Retrieve component
|
||||
u32 component = GetComponent<source_format, source_bytes, comp>(source_pixel);
|
||||
|
||||
constexpr bool is_source_float = IsFloat<source_format>(comp);
|
||||
constexpr bool is_dest_float = IsFloat<dest_format>(dest_comp);
|
||||
|
||||
// Perform float <-> int conversion (normalization)
|
||||
if constexpr (is_source_float && !is_dest_float) {
|
||||
float temp;
|
||||
std::memcpy(&temp, &component, sizeof(float));
|
||||
|
||||
constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1;
|
||||
component = static_cast<u32>(temp * mask);
|
||||
} else if constexpr (!is_source_float && is_dest_float) {
|
||||
constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1;
|
||||
float temp = static_cast<float>(component) / mask;
|
||||
std::memcpy(&component, &temp, sizeof(float));
|
||||
}
|
||||
|
||||
SetComponent<dest_format, dest_bytes, dest_comp>(dest_pixel, component);
|
||||
});
|
||||
}
|
||||
|
||||
// Write destination pixel (dest_bytes includes the padding so we cannot use it here)
|
||||
std::memcpy(dest.data() + dest_offset, DataPtr<dest_bytes>(dest_pixel),
|
||||
vk::blockSize(dest_format));
|
||||
|
||||
// Copy next pixel
|
||||
source_offset += source_pixel.GetStride();
|
||||
dest_offset += dest_pixel.GetStride();
|
||||
}
|
||||
}
|
||||
|
||||
// Asign the byte count with an integral type
|
||||
template <u8 bytes>
|
||||
struct PackedInt { using type = typename std::array<u8, bytes>; };
|
||||
|
||||
template <>
|
||||
struct PackedInt<1> { using type = u8; };
|
||||
|
||||
template <>
|
||||
struct PackedInt<2> { using type = u16; };
|
||||
|
||||
template <>
|
||||
struct PackedInt<4> { using type = u32; };
|
||||
|
||||
template <>
|
||||
struct PackedInt<8> { using type = u64; };
|
||||
|
||||
template <u8 bytes>
|
||||
using PackedType = typename PackedInt<bytes>::type;
|
||||
|
||||
// Returns the pointer to the raw bytes respecting the underlying type
|
||||
template <u8 bytes>
|
||||
constexpr u8* DataPtr(PackedType<bytes>& data) {
|
||||
if constexpr (std::is_integral_v<PackedType<bytes>>) {
|
||||
return reinterpret_cast<u8*>(&data);
|
||||
} else {
|
||||
return data.data();
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true when the specified component is of float type
|
||||
template <vk::Format format>
|
||||
constexpr bool IsFloat(u8 component) {
|
||||
return std::string_view{vk::componentNumericFormat(format, component)} == "SFLOAT";
|
||||
}
|
||||
|
||||
// Returns the offset in bits of the component from the start of the pixel
|
||||
template <vk::Format format, u8 component, u8 i = 0>
|
||||
constexpr u32 GetComponentBitOffset() {
|
||||
if constexpr (i == component) {
|
||||
return 0;
|
||||
} else {
|
||||
return vk::componentBits(format, i) +
|
||||
GetComponentBitOffset<format, component, i + 1>();
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the data located at the specified component
|
||||
template <vk::Format format, u8 bytes, u8 component>
|
||||
constexpr u32 GetComponent(PackedType<bytes>& pixel) {
|
||||
constexpr u64 bit_offset = GetComponentBitOffset<format, component>();
|
||||
constexpr u64 component_bits = vk::componentBits(format, component);
|
||||
constexpr u64 mask = (1 << component_bits) - 1;
|
||||
|
||||
// First process packed formats which are easy to extract from
|
||||
if constexpr (std::is_integral_v<PackedType<bytes>>) {
|
||||
return (pixel >> bit_offset) & mask;
|
||||
} else {
|
||||
// Assume component_bits and offset are byte aligned. Otherwise
|
||||
// this would be extremely complicated
|
||||
using ComponentType = PackedType<(component_bits >> 3)>;
|
||||
static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0);
|
||||
static_assert(std::is_integral_v<ComponentType>);
|
||||
|
||||
constexpr u64 byte_offset = bit_offset >> 3;
|
||||
return *reinterpret_cast<ComponentType*>(DataPtr<bytes>(pixel) + byte_offset);
|
||||
}
|
||||
}
|
||||
|
||||
template <vk::Format format, u8 bytes, u8 component>
|
||||
constexpr void SetComponent(PackedType<bytes>& pixel, u32 data) {
|
||||
constexpr u64 bit_offset = GetComponentBitOffset<format, component>();
|
||||
constexpr u64 component_bits = vk::componentBits(format, component);
|
||||
constexpr u64 mask = (1ull << component_bits) - 1;
|
||||
|
||||
// First process packed formats which are easy to write
|
||||
if constexpr (std::is_integral_v<PackedType<bytes>>) {
|
||||
pixel |= (data & mask) << bit_offset;
|
||||
} else {
|
||||
// Assume component_bits and offset are byte aligned. Otherwise
|
||||
// this would be extremely complicated
|
||||
using ComponentType = PackedType<(component_bits >> 3)>;
|
||||
static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0);
|
||||
static_assert(std::is_integral_v<ComponentType>);
|
||||
|
||||
constexpr u64 byte_offset = bit_offset >> 3;
|
||||
*reinterpret_cast<ComponentType*>(DataPtr(pixel) + byte_offset) = data;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool CanUseRotation();
|
||||
|
||||
// Lookup table that maps component i of source format
|
||||
// to component mapping[i] of the destination format
|
||||
template <vk::Format source_format, u8 source_components,
|
||||
vk::Format dest_format, u8 dest_components>
|
||||
constexpr auto ComponentMapping() {
|
||||
// Since there are at most 4 components we can use 2 bits for each index
|
||||
u8 mapping = 0xFF;
|
||||
for (u8 i = 0; i < source_components; i++) {
|
||||
auto source_name = vk::componentName(source_format, i);
|
||||
for (u8 j = 0; j < dest_components; j++) {
|
||||
auto dest_name = vk::componentName(dest_format, j);
|
||||
if (std::string_view{source_name} == std::string_view{dest_name}) {
|
||||
mapping |= ((j & 0x3) << 2 * i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
// Allows for loop like iteration at compile time
|
||||
template <auto Start, auto End, class F>
|
||||
constexpr void ConstexprFor(F&& f) {
|
||||
if constexpr (Start < End) {
|
||||
f(std::integral_constant<decltype(Start), Start>());
|
||||
ConstexprFor<Start + 1, End>(f);
|
||||
}
|
||||
}
|
||||
|
||||
// Copies pixel data from a source to a destionation buffer, performing
|
||||
// format conversion at the same time
|
||||
template <vk::Format source_format, u8 source_bytes,
|
||||
vk::Format dest_format, u8 dest_bytes>
|
||||
constexpr void Convert(std::span<const u8> source, std::span<u8> dest) {
|
||||
constexpr u32 source_components = vk::componentCount(source_format);
|
||||
constexpr u32 dest_components = vk::componentCount(dest_format);
|
||||
|
||||
// Create a table with the required component mapping
|
||||
constexpr auto mapping = ComponentMapping<source_format, source_components,
|
||||
dest_format, dest_components>();
|
||||
u32 source_offset = 0;
|
||||
u32 dest_offset = 0;
|
||||
while (source_offset < source.size()) {
|
||||
// Load source pixel
|
||||
PackedType<source_bytes> source_pixel;
|
||||
std::memcpy(DataPtr<source_bytes>(source_pixel),
|
||||
source.data() + source_offset, source_bytes);
|
||||
|
||||
PackedType<dest_bytes> dest_pixel{};
|
||||
|
||||
// OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations
|
||||
// of one another. We can use a faster path for these
|
||||
|
||||
ConstexprFor<0, source_components>([&](auto comp) {
|
||||
constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3;
|
||||
|
||||
// If the component is not mapped skip it
|
||||
if constexpr (dest_comp == 0xFF) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Retrieve component
|
||||
u32 component = GetComponent<source_format, source_bytes, comp>(source_pixel);
|
||||
|
||||
constexpr bool is_source_float = IsFloat<source_format>(comp);
|
||||
constexpr bool is_dest_float = IsFloat<dest_format>(dest_comp);
|
||||
|
||||
// Perform float <-> int conversion (normalization)
|
||||
if constexpr (is_source_float && !is_dest_float) {
|
||||
float temp;
|
||||
std::memcpy(&temp, &component, sizeof(float));
|
||||
|
||||
constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1;
|
||||
component = static_cast<u32>(temp * mask);
|
||||
} else if constexpr (!is_source_float && is_dest_float) {
|
||||
constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1;
|
||||
float temp = static_cast<float>(component) / mask;
|
||||
std::memcpy(&component, &temp, sizeof(float));
|
||||
}
|
||||
|
||||
SetComponent<dest_format, dest_bytes, dest_comp>(dest_pixel, component);
|
||||
});
|
||||
|
||||
// Write destination pixel (dest_bytes includes the padding so we cannot use it here)
|
||||
std::memcpy(dest.data() + dest_offset, DataPtr<dest_bytes>(dest_pixel),
|
||||
vk::blockSize(dest_format));
|
||||
|
||||
// Copy next pixel
|
||||
source_offset += source_bytes;
|
||||
dest_offset += dest_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
@ -2,52 +2,148 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <fstream>
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <span>
|
||||
#include <array>
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
std::unique_ptr<Instance> g_vk_instace;
|
||||
Instance::Instance(Frontend::EmuWindow& window) {
|
||||
auto window_info = window.GetWindowInfo();
|
||||
|
||||
// Enable the instance extensions the backend uses
|
||||
auto extensions = GetInstanceExtensions(window_info.type, true);
|
||||
|
||||
// We require a Vulkan 1.1 driver
|
||||
const u32 available_version = vk::enumerateInstanceVersion();
|
||||
if (available_version < VK_API_VERSION_1_1) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!");
|
||||
}
|
||||
|
||||
const vk::ApplicationInfo application_info = {
|
||||
.pApplicationName = "Citra",
|
||||
.applicationVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.pEngineName = "Citra Vulkan",
|
||||
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.apiVersion = available_version
|
||||
};
|
||||
|
||||
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
|
||||
const vk::InstanceCreateInfo instance_info = {
|
||||
.pApplicationInfo = &application_info,
|
||||
.enabledLayerCount = static_cast<u32>(layers.size()),
|
||||
.ppEnabledLayerNames = layers.data(),
|
||||
.enabledExtensionCount = static_cast<u32>(extensions.size()),
|
||||
.ppEnabledExtensionNames = extensions.data()
|
||||
};
|
||||
|
||||
// Create VkInstance
|
||||
instance = vk::createInstance(instance_info);
|
||||
surface = CreateSurface(instance, window);
|
||||
|
||||
// TODO: GPU select dialog
|
||||
physical_device = instance.enumeratePhysicalDevices()[0];
|
||||
device_limits = physical_device.getProperties().limits;
|
||||
|
||||
// Create logical device
|
||||
CreateDevice(true);
|
||||
}
|
||||
|
||||
Instance::~Instance() {
|
||||
device.waitIdle();
|
||||
|
||||
device.destroy();
|
||||
instance.destroy();
|
||||
}
|
||||
|
||||
bool Instance::Create(vk::Instance new_instance, vk::PhysicalDevice gpu,
|
||||
vk::SurfaceKHR surface, bool enable_validation_layer) {
|
||||
instance = new_instance;
|
||||
physical_device = gpu;
|
||||
|
||||
// Get physical device limits
|
||||
device_limits = physical_device.getProperties().limits;
|
||||
|
||||
bool Instance::CreateDevice(bool validation_enabled) {
|
||||
// Determine required extensions and features
|
||||
if (!FindExtensions() || !FindFeatures())
|
||||
return false;
|
||||
auto feature_chain = physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
|
||||
vk::PhysicalDeviceDynamicRenderingFeaturesKHR,
|
||||
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
|
||||
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>();
|
||||
|
||||
// Create logical device
|
||||
return CreateDevice(surface, enable_validation_layer);
|
||||
}
|
||||
// Not having geometry shaders or wide lines will cause issues with rendering.
|
||||
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
|
||||
if (!available.geometryShader && !available.wideLines) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!");
|
||||
}
|
||||
|
||||
bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
|
||||
// Can't create an instance without a valid surface
|
||||
if (!surface) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Invalid surface provided during instance creation!");
|
||||
// Enable some common features other emulators like Dolphin use
|
||||
const vk::PhysicalDeviceFeatures2 features = {
|
||||
.features = {
|
||||
.robustBufferAccess = available.robustBufferAccess,
|
||||
.geometryShader = available.geometryShader,
|
||||
.sampleRateShading = available.sampleRateShading,
|
||||
.dualSrcBlend = available.dualSrcBlend,
|
||||
.logicOp = available.logicOp,
|
||||
.depthClamp = available.depthClamp,
|
||||
.largePoints = available.largePoints,
|
||||
.samplerAnisotropy = available.samplerAnisotropy,
|
||||
.occlusionQueryPrecise = available.occlusionQueryPrecise,
|
||||
.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
|
||||
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
|
||||
.shaderClipDistance = available.shaderClipDistance
|
||||
}
|
||||
};
|
||||
|
||||
// Enable newer Vulkan features
|
||||
auto enabled_features = vk::StructureChain{
|
||||
features,
|
||||
feature_chain.get<vk::PhysicalDeviceDynamicRenderingFeaturesKHR>(),
|
||||
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
|
||||
feature_chain.get<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>()
|
||||
};
|
||||
|
||||
auto extension_list = physical_device.enumerateDeviceExtensionProperties();
|
||||
if (extension_list.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// List available device extensions
|
||||
for (const auto& extension : extension_list) {
|
||||
LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName);
|
||||
}
|
||||
|
||||
// Helper lambda for adding extensions
|
||||
std::array<const char*, 6> enabled_extensions;
|
||||
u32 enabled_extension_count = 0;
|
||||
|
||||
auto AddExtension = [&](std::string_view name, bool required) -> bool {
|
||||
auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) {
|
||||
return name.compare(prop.extensionName.data());
|
||||
});
|
||||
|
||||
if (result != extension_list.end()) {
|
||||
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
|
||||
enabled_extensions[enabled_extension_count++] = name.data();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (required) {
|
||||
LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
// Add required extensions
|
||||
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true);
|
||||
|
||||
// Check for optional features
|
||||
dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
|
||||
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
|
||||
push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
|
||||
|
||||
// Search queue families for graphics and present queues
|
||||
auto family_properties = physical_device.getQueueFamilyProperties();
|
||||
if (family_properties.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Search queue families for graphics and present queues
|
||||
graphics_queue_family_index = -1;
|
||||
present_queue_family_index = -1;
|
||||
for (int i = 0; i < family_properties.size(); i++) {
|
||||
@ -68,24 +164,35 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
|
||||
}
|
||||
}
|
||||
|
||||
if (graphics_queue_family_index == -1 ||
|
||||
present_queue_family_index == -1) {
|
||||
if (graphics_queue_family_index == -1 || present_queue_family_index == -1) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr float queue_priorities[] = {1.0f};
|
||||
|
||||
const std::array layers{"VK_LAYER_KHRONOS_validation"};
|
||||
const std::array queue_infos{
|
||||
vk::DeviceQueueCreateInfo{{}, graphics_queue_family_index, 1, queue_priorities},
|
||||
vk::DeviceQueueCreateInfo{{}, present_queue_family_index, 1, queue_priorities}
|
||||
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
|
||||
const std::array queue_infos = {
|
||||
vk::DeviceQueueCreateInfo{
|
||||
.queueFamilyIndex = graphics_queue_family_index,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = queue_priorities
|
||||
},
|
||||
vk::DeviceQueueCreateInfo{
|
||||
.queueFamilyIndex = present_queue_family_index,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = queue_priorities
|
||||
}
|
||||
};
|
||||
|
||||
vk::DeviceCreateInfo device_info({}, 1, queue_infos.data(), 0, nullptr,
|
||||
extensions.size(), extensions.data(), nullptr, &features);
|
||||
vk::DeviceCreateInfo device_info = {
|
||||
.pNext = &enabled_features,
|
||||
.queueCreateInfoCount = 1,
|
||||
.pQueueCreateInfos = queue_infos.data(),
|
||||
.enabledExtensionCount = enabled_extension_count,
|
||||
.ppEnabledExtensionNames = enabled_extensions.data(),
|
||||
};
|
||||
|
||||
// Create queue create info structs
|
||||
if (graphics_queue_family_index != present_queue_family_index) {
|
||||
device_info.queueCreateInfoCount = 2;
|
||||
}
|
||||
@ -104,87 +211,67 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
|
||||
graphics_queue = device.getQueue(graphics_queue_family_index, 0);
|
||||
present_queue = device.getQueue(present_queue_family_index, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Instance::FindFeatures() {
|
||||
auto available = physical_device.getFeatures();
|
||||
|
||||
// Not having geometry shaders or wide lines will cause issues with rendering.
|
||||
if (!available.geometryShader && !available.wideLines) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Rendering will be limited");
|
||||
}
|
||||
|
||||
// Enable some common features other emulators like Dolphin use
|
||||
vk_features.dualSrcBlend = available.dualSrcBlend;
|
||||
vk_features.geometryShader = available.geometryShader;
|
||||
vk_features.samplerAnisotropy = available.samplerAnisotropy;
|
||||
vk_features.logicOp = available.logicOp;
|
||||
vk_features.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics;
|
||||
vk_features.sampleRateShading = available.sampleRateShading;
|
||||
vk_features.largePoints = available.largePoints;
|
||||
vk_features.shaderStorageImageMultisample = available.shaderStorageImageMultisample;
|
||||
vk_features.occlusionQueryPrecise = available.occlusionQueryPrecise;
|
||||
vk_features.shaderClipDistance = available.shaderClipDistance;
|
||||
vk_features.depthClamp = available.depthClamp;
|
||||
vk_features.textureCompressionBC = available.textureCompressionBC;
|
||||
|
||||
// Enable newer Vulkan features
|
||||
vk12_features.timelineSemaphore = true;
|
||||
vk13_features.dynamicRendering = true;
|
||||
dynamic_state_features.extendedDynamicState = true;
|
||||
dynamic_state2_features.extendedDynamicState2 = true;
|
||||
|
||||
// Include features in device creation
|
||||
vk12_features.pNext = &vk13_features;
|
||||
vk13_features.pNext = &dynamic_state_features;
|
||||
dynamic_state_features.pNext = &dynamic_state2_features;
|
||||
features = vk::PhysicalDeviceFeatures2{vk_features, &vk12_features};
|
||||
// Create the VMA allocator
|
||||
CreateAllocator();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Instance::FindExtensions() {
|
||||
auto available = physical_device.enumerateDeviceExtensionProperties();
|
||||
if (available.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// List available device extensions
|
||||
for (const auto& prop : available) {
|
||||
LOG_INFO(Render_Vulkan, "Vulkan extension: {}", prop.extensionName);
|
||||
}
|
||||
|
||||
// Helper lambda for adding extensions
|
||||
auto AddExtension = [&](const char* name, bool required) {
|
||||
auto result = std::find_if(available.begin(), available.end(), [&](const auto& prop) {
|
||||
return !std::strcmp(name, prop.extensionName);
|
||||
});
|
||||
|
||||
if (result != available.end()) {
|
||||
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
|
||||
extensions.push_back(name);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (required) {
|
||||
LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
|
||||
}
|
||||
|
||||
return false;
|
||||
void Instance::CreateAllocator() {
|
||||
VmaVulkanFunctions functions = {
|
||||
.vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
|
||||
.vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr
|
||||
};
|
||||
|
||||
// Add required extensions
|
||||
if (!AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true) ||
|
||||
!AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, true) ||
|
||||
!AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, true) ||
|
||||
!AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME, true) ||
|
||||
!AddExtension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME, true)) {
|
||||
return false;
|
||||
VmaAllocatorCreateInfo allocator_info = {
|
||||
.physicalDevice = physical_device,
|
||||
.device = device,
|
||||
.pVulkanFunctions = &functions,
|
||||
.instance = instance,
|
||||
.vulkanApiVersion = VK_API_VERSION_1_1
|
||||
};
|
||||
|
||||
vmaCreateAllocator(&allocator_info, &allocator);
|
||||
}
|
||||
|
||||
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
|
||||
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
|
||||
if (auto iter = supported.find(format); iter != supported.end()) {
|
||||
return (iter->second.optimalTilingFeatures & usage) == usage;
|
||||
}
|
||||
|
||||
return true;
|
||||
// Cache format properties so we don't have to query the driver all the time
|
||||
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
|
||||
supported.insert(std::make_pair(format, properties));
|
||||
|
||||
return (properties.optimalTilingFeatures & usage) == usage;
|
||||
}
|
||||
|
||||
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
|
||||
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
|
||||
if (IsFormatSupported(format, features)) {
|
||||
return format;
|
||||
}
|
||||
|
||||
// Return the most supported alternative format preferably with the
|
||||
// same block size according to the Vulkan spec.
|
||||
// See 43.3. Required Format Support of the Vulkan spec
|
||||
switch (format) {
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
return vk::Format::eD32SfloatS8Uint;
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
return vk::Format::eD32Sfloat;
|
||||
case vk::Format::eR5G5B5A1UnormPack16:
|
||||
return vk::Format::eA1R5G5B5UnormPack16;
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
return vk::Format::eB4G4R4A4UnormPack16;
|
||||
case vk::Format::eR8G8B8Unorm:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
default:
|
||||
LOG_WARNING(Render_Vulkan, "Unable to find compatible alternative to format = {} with usage {}",
|
||||
vk::to_string(format), vk::to_string(features));
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -8,61 +8,104 @@
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
/// The global Vulkan instance
|
||||
class Instance {
|
||||
public:
|
||||
Instance() = default;
|
||||
Instance(Frontend::EmuWindow& window);
|
||||
~Instance();
|
||||
|
||||
/// Construct global Vulkan context
|
||||
bool Create(vk::Instance instance, vk::PhysicalDevice gpu,
|
||||
vk::SurfaceKHR surface, bool enable_validation_layer);
|
||||
/// Returns the Vulkan instance
|
||||
vk::Instance GetInstance() const {
|
||||
return instance;
|
||||
}
|
||||
|
||||
vk::Device GetDevice() const { return device; }
|
||||
vk::PhysicalDevice GetPhysicalDevice() const { return physical_device; }
|
||||
vk::Instance GetInstance() const { return instance; }
|
||||
/// Returns the Vulkan surface
|
||||
vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
/// Returns the current physical device
|
||||
vk::PhysicalDevice GetPhysicalDevice() const {
|
||||
return physical_device;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan device
|
||||
vk::Device GetDevice() const {
|
||||
return device;
|
||||
}
|
||||
|
||||
VmaAllocator GetAllocator() const {
|
||||
return allocator;
|
||||
}
|
||||
|
||||
/// Retrieve queue information
|
||||
u32 GetGraphicsQueueFamilyIndex() const { return graphics_queue_family_index; }
|
||||
u32 GetPresentQueueFamilyIndex() const { return present_queue_family_index; }
|
||||
vk::Queue GetGraphicsQueue() const { return graphics_queue; }
|
||||
vk::Queue GetPresentQueue() const { return present_queue; }
|
||||
u32 GetGraphicsQueueFamilyIndex() const {
|
||||
return graphics_queue_family_index;
|
||||
}
|
||||
|
||||
u32 GetPresentQueueFamilyIndex() const {
|
||||
return present_queue_family_index;
|
||||
}
|
||||
|
||||
vk::Queue GetGraphicsQueue() const {
|
||||
return graphics_queue;
|
||||
}
|
||||
|
||||
vk::Queue GetPresentQueue() const {
|
||||
return present_queue;
|
||||
}
|
||||
|
||||
/// Feature support
|
||||
bool SupportsAnisotropicFiltering() const;
|
||||
u32 UniformMinAlignment() const { return static_cast<u32>(device_limits.minUniformBufferOffsetAlignment); }
|
||||
bool IsDynamicRenderingSupported() const {
|
||||
return dynamic_rendering;
|
||||
}
|
||||
|
||||
bool IsExtendedDynamicStateSupported() const {
|
||||
return extended_dynamic_state;
|
||||
}
|
||||
|
||||
bool IsPushDescriptorsSupported() const {
|
||||
return push_descriptors;
|
||||
}
|
||||
|
||||
/// Returns the minimum required alignment for uniforms
|
||||
vk::DeviceSize UniformMinAlignment() const {
|
||||
return device_limits.minUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns true when the format supports the provided feature flags
|
||||
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
|
||||
|
||||
/// Returns the most compatible format that supports the provided feature flags
|
||||
vk::Format GetFormatAlternative(vk::Format format) const;
|
||||
|
||||
private:
|
||||
bool CreateDevice(vk::SurfaceKHR surface, bool validation_enabled);
|
||||
bool FindExtensions();
|
||||
bool FindFeatures();
|
||||
bool CreateDevice(bool validation_enabled);
|
||||
void CreateAllocator();
|
||||
|
||||
public:
|
||||
private:
|
||||
// Queue family indexes
|
||||
u32 present_queue_family_index{}, graphics_queue_family_index{};
|
||||
u32 present_queue_family_index = 0, graphics_queue_family_index = 0;
|
||||
vk::Queue present_queue, graphics_queue;
|
||||
|
||||
// Core vulkan objects
|
||||
vk::Device device;
|
||||
vk::PhysicalDevice physical_device;
|
||||
vk::Instance instance;
|
||||
vk::Device device;
|
||||
|
||||
// Extensions and features
|
||||
std::vector<const char*> extensions;
|
||||
vk::PhysicalDeviceFeatures2 features{};
|
||||
vk::SurfaceKHR surface;
|
||||
vk::PhysicalDeviceLimits device_limits;
|
||||
VmaAllocator allocator;
|
||||
|
||||
// Features per vulkan version
|
||||
vk::PhysicalDeviceFeatures vk_features{};
|
||||
vk::PhysicalDeviceVulkan13Features vk13_features{};
|
||||
vk::PhysicalDeviceVulkan12Features vk12_features{};
|
||||
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state_features{};
|
||||
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state2_features{};
|
||||
vk::PhysicalDeviceColorWriteEnableFeaturesEXT color_write_features{};
|
||||
bool dynamic_rendering = false;
|
||||
bool extended_dynamic_state = false;
|
||||
bool push_descriptors = false;
|
||||
};
|
||||
|
||||
extern std::unique_ptr<Instance> g_vk_instace;
|
||||
|
||||
} // namespace Vulkan
|
||||
} // namespace VideoCore::Vulkan
|
||||
|
414
src/video_core/renderer_vulkan/vk_pipeline.cpp
Normal file
414
src/video_core/renderer_vulkan/vk_pipeline.cpp
Normal file
@ -0,0 +1,414 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
// Maximum binding per descriptor set
|
||||
constexpr u32 MAX_BINDING_SLOTS = 7;
|
||||
|
||||
vk::ShaderStageFlags ToVkStageFlags(BindingType type) {
|
||||
vk::ShaderStageFlags flags;
|
||||
switch (type) {
|
||||
case BindingType::Sampler:
|
||||
case BindingType::Texture:
|
||||
case BindingType::TexelBuffer:
|
||||
flags = vk::ShaderStageFlagBits::eFragment;
|
||||
break;
|
||||
case BindingType::StorageImage:
|
||||
case BindingType::Uniform:
|
||||
case BindingType::UniformDynamic:
|
||||
flags = vk::ShaderStageFlagBits::eFragment |
|
||||
vk::ShaderStageFlagBits::eVertex |
|
||||
vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eCompute;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown descriptor type!");
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
vk::DescriptorType ToVkDescriptorType(BindingType type) {
|
||||
switch (type) {
|
||||
case BindingType::Uniform:
|
||||
return vk::DescriptorType::eUniformBuffer;
|
||||
case BindingType::UniformDynamic:
|
||||
return vk::DescriptorType::eUniformBufferDynamic;
|
||||
case BindingType::TexelBuffer:
|
||||
return vk::DescriptorType::eUniformTexelBuffer;
|
||||
case BindingType::Texture:
|
||||
return vk::DescriptorType::eSampledImage;
|
||||
case BindingType::Sampler:
|
||||
return vk::DescriptorType::eSampler;
|
||||
case BindingType::StorageImage:
|
||||
return vk::DescriptorType::eStorageImage;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown descriptor type!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
u32 AttribBytes(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
return sizeof(float) * attrib.components;
|
||||
case AttribType::Int:
|
||||
return sizeof(u32) * attrib.components;
|
||||
case AttribType::Short:
|
||||
return sizeof(u16) * attrib.components;
|
||||
}
|
||||
}
|
||||
|
||||
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
switch (attrib.components) {
|
||||
case 1: return vk::Format::eR32Sfloat;
|
||||
case 2: return vk::Format::eR32G32Sfloat;
|
||||
case 3: return vk::Format::eR32G32B32Sfloat;
|
||||
case 4: return vk::Format::eR32G32B32A32Sfloat;
|
||||
}
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
vk::ShaderStageFlagBits ToVkShaderStage(ShaderStage stage) {
|
||||
switch (stage) {
|
||||
case ShaderStage::Vertex:
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
case ShaderStage::Fragment:
|
||||
return vk::ShaderStageFlagBits::eFragment;
|
||||
case ShaderStage::Geometry:
|
||||
return vk::ShaderStageFlagBits::eGeometry;
|
||||
case ShaderStage::Compute:
|
||||
return vk::ShaderStageFlagBits::eCompute;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Undefined shader stage!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
PipelineLayout::PipelineLayout(Instance& instance, PipelineLayoutInfo info) :
|
||||
instance(instance), set_layout_count(info.group_count) {
|
||||
|
||||
// Used as temp storage for CreateDescriptorSet
|
||||
std::array<vk::DescriptorSetLayoutBinding, MAX_BINDING_SLOTS> set_bindings;
|
||||
std::array<vk::DescriptorUpdateTemplateEntry, MAX_BINDING_SLOTS> update_entries;
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 set = 0; set < set_layout_count; set++) {
|
||||
auto& group = info.binding_groups[set];
|
||||
|
||||
u32 binding = 0;
|
||||
while (group[binding] != BindingType::None) {
|
||||
const BindingType type = group[binding];
|
||||
set_bindings[binding] = vk::DescriptorSetLayoutBinding{
|
||||
.binding = binding,
|
||||
.descriptorType = ToVkDescriptorType(type),
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = ToVkStageFlags(type)
|
||||
};
|
||||
|
||||
// Also create update template to speed up descriptor writes
|
||||
update_entries[binding] = vk::DescriptorUpdateTemplateEntry{
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = ToVkDescriptorType(type),
|
||||
.offset = binding * sizeof(DescriptorData),
|
||||
.stride = sizeof(DescriptorData)
|
||||
};
|
||||
|
||||
binding++;
|
||||
}
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo layout_info = {
|
||||
.bindingCount = binding,
|
||||
.pBindings = set_bindings.data()
|
||||
};
|
||||
|
||||
// Create descriptor set layout
|
||||
set_layouts[set] = device.createDescriptorSetLayout(layout_info);
|
||||
|
||||
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
|
||||
.descriptorUpdateEntryCount = binding,
|
||||
.pDescriptorUpdateEntries = update_entries.data(),
|
||||
.descriptorSetLayout = set_layouts[set]
|
||||
};
|
||||
|
||||
// Create descriptor set update template
|
||||
update_templates[set] = device.createDescriptorUpdateTemplate(template_info);
|
||||
}
|
||||
|
||||
// Create pipeline layout
|
||||
const vk::PushConstantRange range = {
|
||||
.offset = 0,
|
||||
.size = info.push_constant_block_size
|
||||
};
|
||||
|
||||
bool push_constants = info.push_constant_block_size > 0;
|
||||
const u32 range_count = push_constants ? 1u : 0u;
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = set_layout_count,
|
||||
.pSetLayouts = set_layouts.data(),
|
||||
.pushConstantRangeCount = range_count,
|
||||
.pPushConstantRanges = &range
|
||||
};
|
||||
|
||||
pipeline_layout = device.createPipelineLayout(layout_info);
|
||||
}
|
||||
|
||||
PipelineLayout::~PipelineLayout() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroyPipelineLayout(pipeline_layout);
|
||||
|
||||
u32 i = 0;
|
||||
while (set_layouts[i] && update_templates[i]) {
|
||||
device.destroyDescriptorSetLayout(set_layouts[i]);
|
||||
device.destroyDescriptorUpdateTemplate(update_templates[i]);
|
||||
}
|
||||
}
|
||||
|
||||
Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type,
|
||||
PipelineInfo info, vk::PipelineCache cache) : PipelineBase(type, info),
|
||||
instance(instance), owner(owner) {
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
u32 shader_count = 0;
|
||||
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
|
||||
for (int i = 0; i < info.shaders.size(); i++) {
|
||||
auto& shader = info.shaders[i];
|
||||
if (!shader.IsValid()) {
|
||||
shader_count = i;
|
||||
break;
|
||||
}
|
||||
|
||||
Shader* vk_shader = static_cast<Shader*>(shader.Get());
|
||||
shader_stages[i] = vk::PipelineShaderStageCreateInfo{
|
||||
.stage = ToVkShaderStage(shader->GetStage()),
|
||||
.module = vk_shader->GetHandle(),
|
||||
.pName = shader->GetName().data(),
|
||||
};
|
||||
}
|
||||
|
||||
// Create a graphics pipeline
|
||||
if (type == PipelineType::Graphics) {
|
||||
const vk::VertexInputBindingDescription binding_desc = {
|
||||
.binding = 0,
|
||||
.stride = info.vertex_layout.stride
|
||||
};
|
||||
|
||||
// Populate vertex attribute structures
|
||||
u32 attribute_count = 0;
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attribute_desc;
|
||||
for (u32 i = 0; i < MAX_VERTEX_ATTRIBUTES; i++) {
|
||||
auto& attr = info.vertex_layout.attributes[i];
|
||||
if (attr.components == 0) {
|
||||
attribute_count = i;
|
||||
break;
|
||||
}
|
||||
|
||||
attribute_desc[i] = vk::VertexInputAttributeDescription{
|
||||
.location = i,
|
||||
.binding = 0,
|
||||
.format = ToVkAttributeFormat(attr),
|
||||
.offset = (i > 0 ? attribute_desc[i - 1].offset +
|
||||
AttribBytes(info.vertex_layout.attributes[i - 1]) : 0)
|
||||
};
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = 1,
|
||||
.pVertexBindingDescriptions = &binding_desc,
|
||||
.vertexAttributeDescriptionCount = attribute_count,
|
||||
.pVertexAttributeDescriptions = attribute_desc.data()
|
||||
};
|
||||
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
|
||||
.primitiveRestartEnable = false
|
||||
};
|
||||
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
|
||||
.frontFace = vk::FrontFace::eClockwise,
|
||||
.depthBiasEnable = false,
|
||||
.lineWidth = 1.0f
|
||||
};
|
||||
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||
.rasterizationSamples = vk::SampleCountFlagBits::e1,
|
||||
.sampleShadingEnable = false
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
|
||||
.blendEnable = true,
|
||||
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
|
||||
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
|
||||
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
|
||||
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
|
||||
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
|
||||
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
|
||||
.colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask)
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
.logicOpEnable = true,
|
||||
.logicOp = vk::LogicOp::eCopy, // TODO
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &colorblend_attachment,
|
||||
};
|
||||
|
||||
const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
|
||||
const std::array dynamic_states = {
|
||||
vk::DynamicState::eViewport,
|
||||
vk::DynamicState::eScissor,
|
||||
vk::DynamicState::eLineWidth,
|
||||
vk::DynamicState::eStencilCompareMask,
|
||||
vk::DynamicState::eStencilWriteMask,
|
||||
vk::DynamicState::eStencilReference,
|
||||
// VK_EXT_extended_dynamic_state
|
||||
vk::DynamicState::eCullModeEXT,
|
||||
vk::DynamicState::eDepthCompareOpEXT,
|
||||
vk::DynamicState::eDepthTestEnableEXT,
|
||||
vk::DynamicState::eDepthWriteEnableEXT,
|
||||
vk::DynamicState::eFrontFaceEXT,
|
||||
vk::DynamicState::ePrimitiveTopologyEXT,
|
||||
vk::DynamicState::eStencilOpEXT,
|
||||
vk::DynamicState::eStencilTestEnableEXT,
|
||||
};
|
||||
|
||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||
.dynamicStateCount = extended_dynamic_states ? 14u : 6u,
|
||||
.pDynamicStates = dynamic_states.data()
|
||||
};
|
||||
|
||||
const vk::StencilOpState stencil_op_state = {
|
||||
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
|
||||
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
|
||||
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
|
||||
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op),
|
||||
.compareMask = static_cast<u32>(info.depth_stencil.stencil_compare_mask.Value()),
|
||||
.writeMask = static_cast<u32>(info.depth_stencil.stencil_write_mask.Value()),
|
||||
.reference = static_cast<u32>(info.depth_stencil.stencil_reference.Value())
|
||||
};
|
||||
|
||||
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
|
||||
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
|
||||
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
|
||||
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
|
||||
.depthBoundsTestEnable = false,
|
||||
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
|
||||
.front = stencil_op_state,
|
||||
.back = stencil_op_state
|
||||
};
|
||||
|
||||
const vk::GraphicsPipelineCreateInfo pipeline_info = {
|
||||
.stageCount = shader_count,
|
||||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = &vertex_input_info,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
.pDepthStencilState = &depth_info,
|
||||
.pColorBlendState = &color_blending,
|
||||
.pDynamicState = &dynamic_info,
|
||||
.layout = owner.GetLayout(),
|
||||
.renderPass = {}
|
||||
};
|
||||
|
||||
if (auto result = device.createGraphicsPipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) {
|
||||
pipeline = result.value;
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else { // Compute pipeline
|
||||
ASSERT(shader_count == 1);
|
||||
const vk::ComputePipelineCreateInfo pipeline_info = {
|
||||
.stage = shader_stages[0],
|
||||
.layout = owner.GetLayout()
|
||||
};
|
||||
|
||||
if (auto result = device.createComputePipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) {
|
||||
pipeline = result.value;
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Pipeline::~Pipeline() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroyPipeline(pipeline);
|
||||
}
|
||||
|
||||
|
||||
void Pipeline::BindTexture(u32 group, u32 slot, TextureHandle handle) {
|
||||
Texture* texture = static_cast<Texture*>(handle.Get());
|
||||
|
||||
const DescriptorData data = {
|
||||
.image_info = vk::DescriptorImageInfo{
|
||||
.imageView = texture->GetView(),
|
||||
.imageLayout = texture->GetLayout()
|
||||
}
|
||||
};
|
||||
|
||||
owner.SetBinding(group, slot, data);
|
||||
}
|
||||
|
||||
void Pipeline::BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view) {
|
||||
Buffer* buffer = static_cast<Buffer*>(handle.Get());
|
||||
|
||||
// Texel buffers are bound with their views
|
||||
if (buffer->GetUsage() == BufferUsage::Texel) {
|
||||
const DescriptorData data = {
|
||||
.buffer_view = buffer->GetView(view)
|
||||
};
|
||||
|
||||
owner.SetBinding(group, slot, data);
|
||||
} else {
|
||||
const DescriptorData data = {
|
||||
.buffer_info = vk::DescriptorBufferInfo{
|
||||
.buffer = buffer->GetHandle(),
|
||||
.offset = 0,
|
||||
.range = buffer->GetCapacity()
|
||||
}
|
||||
};
|
||||
|
||||
owner.SetBinding(group, slot, data);
|
||||
}
|
||||
}
|
||||
|
||||
void Pipeline::BindSampler(u32 group, u32 slot, SamplerHandle handle) {
|
||||
Sampler* sampler = static_cast<Sampler*>(handle.Get());
|
||||
|
||||
const DescriptorData data = {
|
||||
.image_info = vk::DescriptorImageInfo{
|
||||
.sampler = sampler->GetHandle()
|
||||
}
|
||||
};
|
||||
|
||||
owner.SetBinding(group, slot, data);
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
96
src/video_core/renderer_vulkan/vk_pipeline.h
Normal file
96
src/video_core/renderer_vulkan/vk_pipeline.h
Normal file
@ -0,0 +1,96 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "video_core/common/pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
class Instance;
|
||||
class CommandScheduler;
|
||||
|
||||
union DescriptorData {
|
||||
vk::DescriptorImageInfo image_info{};
|
||||
vk::DescriptorBufferInfo buffer_info;
|
||||
vk::BufferView buffer_view;
|
||||
};
|
||||
|
||||
/**
|
||||
* Stores the pipeline layout as well as the descriptor set layouts
|
||||
* and update templates associated with those layouts.
|
||||
* Functions as the "parent" to a group of pipelines that share the same layout
|
||||
*/
|
||||
class PipelineLayout {
|
||||
public:
|
||||
PipelineLayout(Instance& instance, PipelineLayoutInfo info);
|
||||
~PipelineLayout();
|
||||
|
||||
// Disable copy constructor
|
||||
PipelineLayout(const PipelineLayout&) = delete;
|
||||
PipelineLayout& operator=(const PipelineLayout&) = delete;
|
||||
|
||||
// Assigns data to a particular binding
|
||||
void SetBinding(u32 set, u32 binding, DescriptorData data) {
|
||||
update_data[set][binding] = data;
|
||||
}
|
||||
|
||||
// Returns the most current descriptor update data
|
||||
std::span<DescriptorData> GetData(u32 set) {
|
||||
return std::span{update_data.at(set).data(), set_layout_count};
|
||||
}
|
||||
|
||||
// Returns the underlying vulkan pipeline layout handle
|
||||
vk::PipelineLayout GetLayout() const {
|
||||
return pipeline_layout;
|
||||
}
|
||||
|
||||
// Returns the descriptor set update template handle associated with the provided set index
|
||||
vk::DescriptorUpdateTemplate GetUpdateTemplate(u32 set) const {
|
||||
return update_templates.at(set);
|
||||
}
|
||||
|
||||
private:
|
||||
Instance& instance;
|
||||
vk::PipelineLayout pipeline_layout = VK_NULL_HANDLE;
|
||||
u32 set_layout_count = 0;
|
||||
std::array<vk::DescriptorSetLayout, MAX_BINDING_GROUPS> set_layouts;
|
||||
std::array<vk::DescriptorUpdateTemplate, MAX_BINDING_GROUPS> update_templates;
|
||||
|
||||
// Update data for the descriptor sets
|
||||
using SetData = std::array<DescriptorData, MAX_BINDINGS_IN_GROUP>;
|
||||
std::array<SetData, MAX_BINDING_GROUPS> update_data;
|
||||
};
|
||||
|
||||
class Pipeline : public VideoCore::PipelineBase {
|
||||
public:
|
||||
Pipeline(Instance& instance, PipelineLayout& owner,
|
||||
PipelineType type, PipelineInfo info, vk::PipelineCache cache);
|
||||
~Pipeline() override;
|
||||
|
||||
void BindTexture(u32 group, u32 slot, TextureHandle handle) override;
|
||||
|
||||
void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) override;
|
||||
|
||||
void BindSampler(u32 group, u32 slot, SamplerHandle handle) override;
|
||||
|
||||
/// Returns the layout tracker that owns this pipeline
|
||||
PipelineLayout& GetOwner() const {
|
||||
return owner;
|
||||
}
|
||||
|
||||
/// Returns the underlying vulkan pipeline handle
|
||||
vk::Pipeline GetHandle() const {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
private:
|
||||
Instance& instance;
|
||||
PipelineLayout& owner;
|
||||
vk::Pipeline pipeline;
|
||||
};
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
@ -1,267 +0,0 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_builder.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_state.h"
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
PipelineLayoutBuilder::PipelineLayoutBuilder() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
void PipelineLayoutBuilder::Clear() {
|
||||
pipeline_layout_info = vk::PipelineLayoutCreateInfo{};
|
||||
}
|
||||
|
||||
vk::PipelineLayout PipelineLayoutBuilder::Build() {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
|
||||
auto result = device.createPipelineLayout(pipeline_layout_info);
|
||||
if (!result) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create pipeline layout");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void PipelineLayoutBuilder::AddDescriptorSet(vk::DescriptorSetLayout layout) {
|
||||
assert(pipeline_layout_info.setLayoutCount < MAX_SETS);
|
||||
|
||||
sets[pipeline_layout_info.setLayoutCount++] = layout;
|
||||
pipeline_layout_info.pSetLayouts = sets.data();
|
||||
}
|
||||
|
||||
void PipelineLayoutBuilder::AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size) {
|
||||
assert(pipeline_layout_info.pushConstantRangeCount < MAX_PUSH_CONSTANTS);
|
||||
|
||||
push_constants[pipeline_layout_info.pushConstantRangeCount++] = {stages, offset, size};
|
||||
pipeline_layout_info.pPushConstantRanges = push_constants.data();
|
||||
}
|
||||
|
||||
PipelineBuilder::PipelineBuilder() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
void PipelineBuilder::Clear() {
|
||||
pipeline_info = vk::GraphicsPipelineCreateInfo{};
|
||||
shader_stages.clear();
|
||||
|
||||
vertex_input_state = vk::PipelineVertexInputStateCreateInfo{};
|
||||
input_assembly = vk::PipelineInputAssemblyStateCreateInfo{};
|
||||
rasterization_state = vk::PipelineRasterizationStateCreateInfo{};
|
||||
depth_state = vk::PipelineDepthStencilStateCreateInfo{};
|
||||
|
||||
blend_state = vk::PipelineColorBlendStateCreateInfo{};
|
||||
blend_attachment = vk::PipelineColorBlendAttachmentState{};
|
||||
dynamic_info = vk::PipelineDynamicStateCreateInfo{};
|
||||
dynamic_states.fill({});
|
||||
|
||||
viewport_state = vk::PipelineViewportStateCreateInfo{};
|
||||
multisample_info = vk::PipelineMultisampleStateCreateInfo{};
|
||||
|
||||
// Set defaults
|
||||
SetNoCullRasterizationState();
|
||||
SetNoDepthTestState();
|
||||
SetNoBlendingState();
|
||||
SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleList);
|
||||
|
||||
// Have to be specified even if dynamic
|
||||
SetViewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
|
||||
SetScissorRect(0, 0, 1, 1);
|
||||
SetBlendConstants(1.0f, 1.0f, 1.0f, 1.0f);
|
||||
SetMultisamples(vk::SampleCountFlagBits::e1, false);
|
||||
}
|
||||
|
||||
vk::Pipeline PipelineBuilder::Build() {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
|
||||
auto result = device.createGraphicsPipeline({}, pipeline_info);
|
||||
if (result.result != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to build vulkan pipeline!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return result.value;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetPipelineLayout(vk::PipelineLayout layout) {
|
||||
pipeline_info.layout = layout;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module) {
|
||||
auto result = std::ranges::find_if(shader_stages.begin(), shader_stages.end(), [stage](const auto& info) {
|
||||
return info.stage == stage;
|
||||
});
|
||||
|
||||
/* If the stage already exists, just replace the module */
|
||||
if (result != shader_stages.end()) {
|
||||
result->module = module;
|
||||
}
|
||||
else {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags(), stage, module, "main");
|
||||
pipeline_info.stageCount++;
|
||||
}
|
||||
|
||||
pipeline_info.pStages = shader_stages.data();
|
||||
}
|
||||
|
||||
void PipelineBuilder::AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate,
|
||||
std::span<vk::VertexInputAttributeDescription> attributes) {
|
||||
// Copy attributes to private array
|
||||
auto loc = vertex_attributes.begin() + vertex_input_state.vertexAttributeDescriptionCount;
|
||||
std::copy(attributes.begin(), attributes.end(), loc);
|
||||
|
||||
vertex_buffers[vertex_input_state.vertexBindingDescriptionCount++] = {binding, stride, input_rate};
|
||||
vertex_input_state.vertexAttributeDescriptionCount += attributes.size();
|
||||
|
||||
vertex_input_state.pVertexBindingDescriptions = vertex_buffers.data();
|
||||
vertex_input_state.pVertexAttributeDescriptions = vertex_attributes.data();
|
||||
|
||||
pipeline_info.pVertexInputState = &vertex_input_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart) {
|
||||
input_assembly.topology = topology;
|
||||
input_assembly.primitiveRestartEnable = enable_primitive_restart;
|
||||
pipeline_info.pInputAssemblyState = &input_assembly;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode,
|
||||
vk::FrontFace front_face) {
|
||||
rasterization_state.polygonMode = polygon_mode;
|
||||
rasterization_state.cullMode = cull_mode;
|
||||
rasterization_state.frontFace = front_face;
|
||||
pipeline_info.pRasterizationState = &rasterization_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetLineWidth(float width) {
|
||||
rasterization_state.lineWidth = width;
|
||||
pipeline_info.pRasterizationState = &rasterization_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading) {
|
||||
multisample_info.rasterizationSamples = samples;
|
||||
multisample_info.sampleShadingEnable = per_sample_shading;
|
||||
multisample_info.minSampleShading = (static_cast<u32>(samples) > 1) ? 1.0f : 0.0f;
|
||||
pipeline_info.pMultisampleState = &multisample_info;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetNoCullRasterizationState() {
|
||||
SetRasterizationState(vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, vk::FrontFace::eClockwise);
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op) {
|
||||
depth_state.depthTestEnable = depth_test;
|
||||
depth_state.depthWriteEnable = depth_write;
|
||||
depth_state.depthCompareOp = compare_op;
|
||||
pipeline_info.pDepthStencilState = &depth_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back) {
|
||||
depth_state.stencilTestEnable = stencil_test;
|
||||
depth_state.front = front;
|
||||
depth_state.back = back;
|
||||
pipeline_info.pDepthStencilState = &depth_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetNoStencilState() {
|
||||
depth_state.stencilTestEnable = VK_FALSE;
|
||||
depth_state.front = vk::StencilOpState{};
|
||||
depth_state.back = vk::StencilOpState{};
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetNoDepthTestState() {
|
||||
SetDepthState(false, false, vk::CompareOp::eAlways);
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetBlendConstants(float r, float g, float b, float a) {
|
||||
blend_state.blendConstants = std::array<float, 4>{r, g, b, a};
|
||||
pipeline_info.pColorBlendState = &blend_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetBlendLogicOp(vk::LogicOp logic_op) {
|
||||
blend_state.logicOp = logic_op;
|
||||
blend_state.logicOpEnable = false;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor,
|
||||
vk::BlendOp op, vk::BlendFactor alpha_src_factor,
|
||||
vk::BlendFactor alpha_dst_factor, vk::BlendOp alpha_op,
|
||||
vk::ColorComponentFlags write_mask) {
|
||||
blend_attachment.blendEnable = blend_enable;
|
||||
blend_attachment.srcColorBlendFactor = src_factor;
|
||||
blend_attachment.dstColorBlendFactor = dst_factor;
|
||||
blend_attachment.colorBlendOp = op;
|
||||
blend_attachment.srcAlphaBlendFactor = alpha_src_factor;
|
||||
blend_attachment.dstAlphaBlendFactor = alpha_dst_factor;
|
||||
blend_attachment.alphaBlendOp = alpha_op;
|
||||
blend_attachment.colorWriteMask = write_mask;
|
||||
|
||||
blend_state.attachmentCount = 1;
|
||||
blend_state.pAttachments = &blend_attachment;
|
||||
pipeline_info.pColorBlendState = &blend_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetNoBlendingState() {
|
||||
SetBlendAttachment(false, vk::BlendFactor::eOne, vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::BlendFactor::eOne,
|
||||
vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
||||
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA);
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetDynamicStates(const std::span<vk::DynamicState> states) {
|
||||
if (states.size() > MAX_DYNAMIC_STATES) {
|
||||
LOG_ERROR(Render_Vulkan, "Cannot include more dynamic states!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// Copy the state data
|
||||
std::copy(states.begin(), states.end(), dynamic_states.begin());
|
||||
dynamic_info.dynamicStateCount = states.size();
|
||||
dynamic_info.pDynamicStates = dynamic_states.data();
|
||||
pipeline_info.pDynamicState = &dynamic_info;
|
||||
return;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetRenderingFormats(vk::Format color, vk::Format depth_stencil) {
|
||||
color_format = color;
|
||||
depth_stencil_format = depth_stencil;
|
||||
|
||||
auto IsStencil = [](vk::Format format) -> bool {
|
||||
switch (format) {
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
const u32 color_attachment_count = color == vk::Format::eUndefined ? 0 : 1;
|
||||
rendering_info = vk::PipelineRenderingCreateInfo{0, color_attachment_count, &color_format, depth_stencil_format,
|
||||
IsStencil(depth_stencil) ? depth_stencil : vk::Format::eUndefined};
|
||||
pipeline_info.pNext = &rendering_info;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetViewport(float x, float y, float width, float height, float min_depth, float max_depth) {
|
||||
viewport = vk::Viewport{x, y, width, height, min_depth, max_depth};
|
||||
viewport_state.pViewports = &viewport;
|
||||
viewport_state.viewportCount = 1;
|
||||
pipeline_info.pViewportState = &viewport_state;
|
||||
}
|
||||
|
||||
void PipelineBuilder::SetScissorRect(s32 x, s32 y, u32 width, u32 height) {
|
||||
scissor = vk::Rect2D{{x, y}, {width, height}};
|
||||
viewport_state.scissorCount = 1u;
|
||||
viewport_state.pScissors = &scissor;
|
||||
pipeline_info.pViewportState = &viewport_state;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
@ -1,108 +0,0 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include "video_core/renderer_vulkan/vk_texture.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class PipelineLayoutBuilder {
|
||||
public:
|
||||
PipelineLayoutBuilder();
|
||||
~PipelineLayoutBuilder() = default;
|
||||
|
||||
void Clear();
|
||||
vk::PipelineLayout Build();
|
||||
|
||||
void AddDescriptorSet(vk::DescriptorSetLayout layout);
|
||||
void AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size);
|
||||
|
||||
private:
|
||||
static constexpr u32 MAX_SETS = 8;
|
||||
static constexpr u32 MAX_PUSH_CONSTANTS = 5;
|
||||
|
||||
vk::PipelineLayoutCreateInfo pipeline_layout_info;
|
||||
std::array<vk::DescriptorSetLayout, MAX_SETS> sets;
|
||||
std::array<vk::PushConstantRange, MAX_PUSH_CONSTANTS> push_constants;
|
||||
};
|
||||
|
||||
class PipelineBuilder {
|
||||
public:
|
||||
PipelineBuilder();
|
||||
~PipelineBuilder() = default;
|
||||
|
||||
void Clear();
|
||||
vk::Pipeline Build();
|
||||
|
||||
void SetPipelineLayout(vk::PipelineLayout layout);
|
||||
void AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate,
|
||||
const std::span<vk::VertexInputAttributeDescription> attributes);
|
||||
void SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module);
|
||||
|
||||
void SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart = false);
|
||||
void SetLineWidth(float width);
|
||||
void SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading);
|
||||
void SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode,
|
||||
vk::FrontFace front_face);
|
||||
|
||||
void SetNoCullRasterizationState();
|
||||
void SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op);
|
||||
void SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back);
|
||||
void SetNoDepthTestState();
|
||||
void SetNoStencilState();
|
||||
|
||||
void SetBlendConstants(float r, float g, float b, float a);
|
||||
void SetNoBlendingState();
|
||||
void SetBlendLogicOp(vk::LogicOp logic_op);
|
||||
void SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor,
|
||||
vk::BlendOp op, vk::BlendFactor alpha_src_factor, vk::BlendFactor alpha_dst_factor,
|
||||
vk::BlendOp alpha_op,vk::ColorComponentFlags write_mask);
|
||||
|
||||
void SetViewport(float x, float y, float width, float height, float min_depth, float max_depth);
|
||||
void SetScissorRect(s32 x, s32 y, u32 width, u32 height);
|
||||
void SetDynamicStates(const std::span<vk::DynamicState> states);
|
||||
void SetRenderingFormats(vk::Format color, vk::Format depth_stencil = vk::Format::eUndefined);
|
||||
|
||||
private:
|
||||
static constexpr u32 MAX_DYNAMIC_STATES = 20;
|
||||
static constexpr u32 MAX_SHADER_STAGES = 3;
|
||||
static constexpr u32 MAX_VERTEX_BUFFERS = 8;
|
||||
static constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
|
||||
|
||||
vk::GraphicsPipelineCreateInfo pipeline_info;
|
||||
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
|
||||
|
||||
vk::PipelineVertexInputStateCreateInfo vertex_input_state;
|
||||
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BUFFERS> vertex_buffers;
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> vertex_attributes;
|
||||
|
||||
vk::PipelineInputAssemblyStateCreateInfo input_assembly;
|
||||
vk::PipelineRasterizationStateCreateInfo rasterization_state;
|
||||
vk::PipelineDepthStencilStateCreateInfo depth_state;
|
||||
|
||||
// Blending
|
||||
vk::PipelineColorBlendStateCreateInfo blend_state;
|
||||
vk::PipelineColorBlendAttachmentState blend_attachment;
|
||||
vk::PipelineDynamicStateCreateInfo dynamic_info;
|
||||
std::array<vk::DynamicState, MAX_DYNAMIC_STATES> dynamic_states;
|
||||
|
||||
vk::PipelineViewportStateCreateInfo viewport_state;
|
||||
vk::Viewport viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
|
||||
vk::Rect2D scissor;
|
||||
|
||||
// Multisampling
|
||||
vk::PipelineMultisampleStateCreateInfo multisample_info;
|
||||
vk::PipelineRenderingCreateInfo rendering_info;
|
||||
vk::Format color_format, depth_stencil_format;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
130
src/video_core/renderer_vulkan/vk_platform.h
Normal file
130
src/video_core/renderer_vulkan/vk_platform.h
Normal file
@ -0,0 +1,130 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Include the vulkan platform specific header
|
||||
#if defined(ANDROID) || defined (__ANDROID__)
|
||||
#define VK_USE_PLATFORM_ANDROID_KHR 1
|
||||
#elif defined(_WIN32)
|
||||
#define VK_USE_PLATFORM_WIN32_KHR 1
|
||||
#elif defined(__APPLE__)
|
||||
#define VK_USE_PLATFORM_MACOS_MVK 1
|
||||
#define VK_USE_PLATFORM_METAL_EXT 1
|
||||
#else
|
||||
#ifdef WAYLAND_DISPLAY
|
||||
#define VK_USE_PLATFORM_WAYLAND_KHR 1
|
||||
#else // wayland
|
||||
#define VK_USE_PLATFORM_XLIB_KHR 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <vector>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend::EmuWindow& emu_window) {
|
||||
const auto& window_info = emu_window.GetWindowInfo();
|
||||
vk::SurfaceKHR surface;
|
||||
|
||||
#if VK_USE_PLATFORM_WIN32_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::Windows) {
|
||||
const vk::Win32SurfaceCreateInfoKHR win32_ci = {
|
||||
.hinstance = nullptr,
|
||||
.hwnd = static_cast<HWND>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
|
||||
}
|
||||
}
|
||||
#elif VK_USE_PLATFORM_XLIB_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::X11) {
|
||||
const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
|
||||
static_cast<Display*>(window_info.display_connection),
|
||||
reinterpret_cast<Window>(window_info.render_surface)};
|
||||
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
#elif VK_USE_PLATFORM_WAYLAND_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
|
||||
static_cast<wl_display*>(window_info.display_connection),
|
||||
static_cast<wl_surface*>(window_info.render_surface)};
|
||||
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!surface) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
|
||||
}
|
||||
|
||||
return surface;
|
||||
}
|
||||
|
||||
inline auto GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
|
||||
const auto properties = vk::enumerateInstanceExtensionProperties();
|
||||
if (properties.empty()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
|
||||
return std::vector<const char*>{};
|
||||
}
|
||||
|
||||
// Add the windowing system specific extension
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(6);
|
||||
|
||||
switch (window_type) {
|
||||
case Frontend::WindowSystemType::Headless:
|
||||
break;
|
||||
#if VK_USE_PLATFORM_WIN32_KHR
|
||||
case Frontend::WindowSystemType::Windows:
|
||||
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif VK_USE_PLATFORM_XLIB_KHR
|
||||
case Frontend::WindowSystemType::X11:
|
||||
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif VK_USE_PLATFORM_WAYLAND_KHR
|
||||
case Frontend::WindowSystemType::Wayland:
|
||||
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
break;
|
||||
}
|
||||
|
||||
if (window_type != Frontend::WindowSystemType::Headless) {
|
||||
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (enable_debug_utils) {
|
||||
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
for (const char* extension : extensions) {
|
||||
const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) {
|
||||
return std::strcmp(extension, prop.extensionName) == 0;
|
||||
});
|
||||
|
||||
if (iter == properties.end()) {
|
||||
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
|
||||
return std::vector<const char*>{};
|
||||
}
|
||||
}
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
@ -5,15 +5,10 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <glad/glad.h>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/math_util.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/pica_state.h"
|
||||
@ -21,27 +16,45 @@
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
#include "video_core/renderer_vulkan/vk_surface_params.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(Vulkan_Blits, "Vulkan", "Blits", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(Vulkan_CacheManagement, "Vulkan", "Cache Management", MP_RGB(100, 255, 100));
|
||||
|
||||
using PixelFormat = SurfaceParams::PixelFormat;
|
||||
using SurfaceType = SurfaceParams::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
|
||||
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
// They shall be big enough for about one frame.
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
constexpr u32 TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
|
||||
constexpr std::array LUT_LF_VIEWS = {
|
||||
vk::Format::eR32G32Sfloat
|
||||
};
|
||||
|
||||
constexpr std::array LUT_VIEWS = {
|
||||
vk::Format::eR32G32Sfloat,
|
||||
vk::Format::eR32G32B32A32Sfloat
|
||||
};
|
||||
|
||||
RasterizerVulkan::RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window) :
|
||||
scheduler(scheduler), vertex_buffer(scheduler, VERTEX_BUFFER_SIZE, BufferUsage::Vertex),
|
||||
index_buffer(scheduler, INDEX_BUFFER_SIZE, BufferUsage::Index),
|
||||
uniform_buffer(scheduler, UNIFORM_BUFFER_SIZE, BufferUsage::Uniform),
|
||||
texture_buffer_lut_lf(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_LF_VIEWS),
|
||||
texture_buffer_lut(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_VIEWS) {
|
||||
|
||||
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
|
||||
// Implement shadow
|
||||
allow_shadow = false;
|
||||
|
||||
@ -65,29 +78,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
|
||||
uniform_buffer_alignment);
|
||||
uniform_size_aligned_fs = Common::AlignUp<std::size_t>(sizeof(UniformData),
|
||||
uniform_buffer_alignment);
|
||||
// Allocate texture buffer LUTs
|
||||
Buffer::Info texel_buffer_info = {
|
||||
.size = TEXTURE_BUFFER_SIZE,
|
||||
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
.usage = vk::BufferUsageFlagBits::eUniformTexelBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferDst,
|
||||
};
|
||||
|
||||
texel_buffer_info.view_formats[0] = vk::Format::eR32G32Sfloat;
|
||||
texture_buffer_lut_lf.Create(texel_buffer_info);
|
||||
|
||||
texel_buffer_info.view_formats[1] = vk::Format::eR32G32B32A32Sfloat;
|
||||
texture_buffer_lut.Create(texel_buffer_info);
|
||||
|
||||
// Create and bind uniform buffers
|
||||
Buffer::Info uniform_info = {
|
||||
.size = UNIFORM_BUFFER_SIZE,
|
||||
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
.usage = vk::BufferUsageFlagBits::eUniformBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferDst
|
||||
};
|
||||
|
||||
uniform_buffer.Create(uniform_info);
|
||||
auto& state = VulkanState::Get();
|
||||
state.SetUniformBuffer(0, 0, uniform_size_aligned_vs, uniform_buffer);
|
||||
state.SetUniformBuffer(1, uniform_size_aligned_vs, uniform_size_aligned_fs, uniform_buffer);
|
||||
@ -97,26 +87,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
|
||||
state.SetTexelBuffer(1, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 0);
|
||||
state.SetTexelBuffer(2, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 1);
|
||||
|
||||
// Create vertex and index buffers
|
||||
Buffer::Info vertex_info = {
|
||||
.size = VERTEX_BUFFER_SIZE,
|
||||
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
.usage = vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferDst
|
||||
};
|
||||
|
||||
Buffer::Info index_info = {
|
||||
.size = INDEX_BUFFER_SIZE,
|
||||
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
.usage = vk::BufferUsageFlagBits::eIndexBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferDst
|
||||
};
|
||||
|
||||
vertex_buffer.Create(vertex_info);
|
||||
index_buffer.Create(index_info);
|
||||
|
||||
// Set clear texture color
|
||||
state.SetPlaceholderColor(255, 0, 0, 255);
|
||||
state.SetPlaceholderColor(255, 255, 255, 255);
|
||||
|
||||
SyncEntireState();
|
||||
}
|
||||
@ -238,7 +210,7 @@ void RasterizerVulkan::DrawTriangles() {
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
MICROPROFILE_SCOPE(Vulkan_Drawing);
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
auto& state = VulkanState::Get();
|
||||
|
||||
@ -252,6 +224,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
const bool using_color_fb =
|
||||
regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0;
|
||||
|
||||
const bool using_depth_fb =
|
||||
!shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
|
||||
(write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0);
|
||||
@ -304,15 +277,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
|
||||
// sub-rect changes, the scissor bounds also need to be updated.
|
||||
GLint scissor_x1 =
|
||||
static_cast<GLint>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
|
||||
GLint scissor_y1 =
|
||||
static_cast<GLint>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
|
||||
int scissor_x1 =
|
||||
static_cast<int>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
|
||||
int scissor_y1 =
|
||||
static_cast<int>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
|
||||
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
|
||||
// scaling or doing multisampling.
|
||||
GLint scissor_x2 =
|
||||
static_cast<GLint>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
|
||||
GLint scissor_y2 = static_cast<GLint>(surfaces_rect.bottom +
|
||||
int scissor_x2 =
|
||||
static_cast<int>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
|
||||
int scissor_y2 = static_cast<int>(surfaces_rect.bottom +
|
||||
(regs.rasterizer.scissor_test.y2 + 1) * res_scale);
|
||||
|
||||
if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
|
||||
@ -420,8 +393,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
depth_surface->texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
|
||||
}
|
||||
|
||||
g_vk_task_scheduler->Submit();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -924,22 +895,22 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushAll() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
|
||||
res_cache.FlushAll();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
|
||||
res_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
|
||||
res_cache.InvalidateRegion(addr, size, nullptr);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
|
||||
res_cache.FlushRegion(addr, size);
|
||||
res_cache.InvalidateRegion(addr, size, nullptr);
|
||||
}
|
||||
@ -949,7 +920,7 @@ void RasterizerVulkan::ClearAll(bool flush) {
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
MICROPROFILE_SCOPE(Vulkan_Blits);
|
||||
|
||||
SurfaceParams src_params;
|
||||
src_params.addr = config.GetPhysicalInputAddress();
|
||||
@ -1099,7 +1070,7 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
|
||||
if (framebuffer_addr == 0) {
|
||||
return false;
|
||||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
|
||||
|
||||
SurfaceParams src_params;
|
||||
src_params.addr = framebuffer_addr;
|
||||
@ -1421,7 +1392,7 @@ void RasterizerVulkan::SyncLightSpotDirection(int light_index) {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
|
||||
GLfloat dist_atten_bias =
|
||||
float dist_atten_bias =
|
||||
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
|
||||
.ToFloat32();
|
||||
|
||||
@ -1432,7 +1403,7 @@ void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {
|
||||
GLfloat dist_atten_scale =
|
||||
float dist_atten_scale =
|
||||
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
|
||||
.ToFloat32();
|
||||
|
||||
@ -1444,8 +1415,8 @@ void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {
|
||||
|
||||
void RasterizerVulkan::SyncShadowBias() {
|
||||
const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
|
||||
GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
|
||||
GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
|
||||
float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
|
||||
float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
|
||||
|
||||
if (constant != uniform_block_data.data.shadow_bias_constant ||
|
||||
linear != uniform_block_data.data.shadow_bias_linear) {
|
||||
@ -1456,7 +1427,7 @@ void RasterizerVulkan::SyncShadowBias() {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncShadowTextureBias() {
|
||||
GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1;
|
||||
int bias = Pica::g_state.regs.texturing.shadow.bias << 1;
|
||||
if (bias != uniform_block_data.data.shadow_texture_bias) {
|
||||
uniform_block_data.data.shadow_texture_bias = bias;
|
||||
uniform_block_data.dirty = true;
|
||||
|
@ -5,21 +5,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <glm/glm.hpp>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/pica_types.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_lighting.h"
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_state.h"
|
||||
@ -31,7 +21,11 @@ class EmuWindow;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum class UniformBindings : u32 { Common, VS, GS };
|
||||
enum class UniformBindings : u32 {
|
||||
Common = 0,
|
||||
VertexShader = 1,
|
||||
GeometryShader = 2
|
||||
};
|
||||
|
||||
struct LightSrc {
|
||||
alignas(16) glm::vec3 specular_0;
|
||||
@ -79,14 +73,13 @@ struct UniformData {
|
||||
alignas(16) glm::vec4 clip_coef;
|
||||
};
|
||||
|
||||
static_assert(
|
||||
sizeof(UniformData) == 0x4F0,
|
||||
"The size of the UniformData structure has changed, update the structure in the shader");
|
||||
static_assert(sizeof(UniformData) == 0x4F0,
|
||||
"The size of the UniformData structure has changed, update the structure in the shader");
|
||||
static_assert(sizeof(UniformData) < 16384,
|
||||
"UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
|
||||
// NOTE: the same rule from UniformData also applies here.
|
||||
/// NOTE: the same rule from UniformData also applies here.
|
||||
struct PicaUniformsData {
|
||||
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
@ -102,17 +95,18 @@ struct PicaUniformsData {
|
||||
struct VSUniformData {
|
||||
PicaUniformsData uniforms;
|
||||
};
|
||||
static_assert(
|
||||
sizeof(VSUniformData) == 1856,
|
||||
"The size of the VSUniformData structure has changed, update the structure in the shader");
|
||||
|
||||
static_assert(sizeof(VSUniformData) == 1856,
|
||||
"The size of the VSUniformData structure has changed, update the structure in the shader");
|
||||
static_assert(sizeof(VSUniformData) < 16384,
|
||||
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
struct ScreenInfo;
|
||||
class CommandScheduler;
|
||||
|
||||
class RasterizerVulkan : public VideoCore::RasterizerInterface {
|
||||
public:
|
||||
explicit RasterizerVulkan(Frontend::EmuWindow& emu_window);
|
||||
explicit RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window);
|
||||
~RasterizerVulkan() override;
|
||||
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
@ -252,6 +246,7 @@ private:
|
||||
};
|
||||
|
||||
private:
|
||||
CommandScheduler& scheduler;
|
||||
RasterizerCacheVulkan res_cache;
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
bool shader_dirty = true;
|
||||
@ -269,13 +264,7 @@ private:
|
||||
bool dirty;
|
||||
} uniform_block_data = {};
|
||||
|
||||
// They shall be big enough for about one frame.
|
||||
static constexpr std::size_t VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
static constexpr std::size_t INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
|
||||
Buffer vertex_buffer, index_buffer;
|
||||
StreamBuffer vertex_buffer, index_buffer;
|
||||
StreamBuffer uniform_buffer, texture_buffer_lut_lf, texture_buffer_lut;
|
||||
|
||||
u32 uniform_buffer_alignment;
|
||||
@ -293,4 +282,4 @@ private:
|
||||
bool allow_shadow{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
} // namespace Vulkan
|
||||
|
@ -16,19 +16,11 @@
|
||||
#include <boost/range/iterator_range.hpp>
|
||||
#include "common/alignment.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/color.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/texture.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "core/core.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
|
||||
@ -375,7 +367,7 @@ static vk::Rect2D FromRect(Common::Rectangle<u32> rect) {
|
||||
|
||||
// Allocate an uninitialized texture of appropriate size and format for the surface
|
||||
void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, vk::Format format,
|
||||
u32 width, u32 height) {
|
||||
u32 width, u32 height, bool framebuffer) {
|
||||
// First check if the texture can be recycled
|
||||
auto recycled_tex = host_texture_recycler.find({format, width, height});
|
||||
if (recycled_tex != host_texture_recycler.end()) {
|
||||
@ -384,30 +376,31 @@ void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, v
|
||||
return;
|
||||
}
|
||||
|
||||
auto GetUsage = [](SurfaceType type) {
|
||||
auto GetUsage = [framebuffer](SurfaceType type) {
|
||||
auto usage = vk::ImageUsageFlagBits::eSampled |
|
||||
vk::ImageUsageFlagBits::eTransferDst |
|
||||
vk::ImageUsageFlagBits::eTransferSrc;
|
||||
|
||||
switch (type) {
|
||||
case SurfaceType::Color:
|
||||
case SurfaceType::Fill:
|
||||
case SurfaceType::Texture:
|
||||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||
break;
|
||||
case SurfaceType::Depth:
|
||||
case SurfaceType::DepthStencil:
|
||||
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
if (framebuffer) {
|
||||
switch (type) {
|
||||
case SurfaceType::Color:
|
||||
case SurfaceType::Fill:
|
||||
case SurfaceType::Texture:
|
||||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||
break;
|
||||
case SurfaceType::Depth:
|
||||
case SurfaceType::DepthStencil:
|
||||
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return usage;
|
||||
};
|
||||
|
||||
// Otherwise create a brand new texture
|
||||
u32 levels = std::log2(std::max(width, height)) + 1;
|
||||
u32 levels = static_cast<u32>(std::log2(std::max(width, height))) + 1;
|
||||
Texture::Info texture_info{
|
||||
.width = width,
|
||||
.height = height,
|
||||
@ -516,8 +509,9 @@ void CachedSurface::LoadGPUBuffer(PAddr load_start, PAddr load_end) {
|
||||
const bool need_swap = (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8);
|
||||
|
||||
const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr);
|
||||
if (texture_src_data == nullptr)
|
||||
if (texture_src_data == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (vk_buffer.empty()) {
|
||||
vk_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
|
||||
@ -660,9 +654,9 @@ void CachedSurface::UploadGPUTexture(Common::Rectangle<u32> rect) {
|
||||
// Load data from memory to the surface
|
||||
auto buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format);
|
||||
auto update_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format);
|
||||
std::span<u8> memory(vk_buffer.data() + buffer_offset, update_size);
|
||||
std::span<const u8> memory{vk_buffer.data() + buffer_offset, update_size};
|
||||
|
||||
texture.Upload(0, 0, stride, FromRect(rect), memory);
|
||||
texture.Upload(0, 0, stride, memory);
|
||||
|
||||
InvalidateAllWatcher();
|
||||
}
|
||||
@ -867,7 +861,8 @@ Surface RasterizerCacheVulkan::GetSurface(const SurfaceParams& params, ScaleMatc
|
||||
|
||||
SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams& params,
|
||||
ScaleMatch match_res_scale,
|
||||
bool load_if_create) {
|
||||
bool load_if_create,
|
||||
bool framebuffer) {
|
||||
if (params.addr == 0 || params.height * params.width == 0) {
|
||||
return std::make_tuple(nullptr, Common::Rectangle<u32>{});
|
||||
}
|
||||
@ -887,7 +882,7 @@ SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams&
|
||||
SurfaceParams new_params = *surface;
|
||||
new_params.res_scale = params.res_scale;
|
||||
|
||||
surface = CreateSurface(new_params);
|
||||
surface = CreateSurface(new_params, framebuffer);
|
||||
RegisterSurface(surface);
|
||||
}
|
||||
}
|
||||
@ -1077,8 +1072,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces(
|
||||
// Make sure that framebuffers don't overlap if both color and depth are being used
|
||||
if (using_color_fb && using_depth_fb &&
|
||||
boost::icl::length(color_vp_interval & depth_vp_interval)) {
|
||||
LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
|
||||
"overlapping framebuffers not supported!");
|
||||
LOG_CRITICAL(Render_Vulkan, "Color and depth framebuffer memory regions overlap!");
|
||||
using_depth_fb = false;
|
||||
}
|
||||
|
||||
@ -1086,13 +1080,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces(
|
||||
Surface color_surface = nullptr;
|
||||
if (using_color_fb)
|
||||
std::tie(color_surface, color_rect) =
|
||||
GetSurfaceSubRect(color_params, ScaleMatch::Exact, false);
|
||||
GetSurfaceSubRect(color_params, ScaleMatch::Exact, false, true);
|
||||
|
||||
Common::Rectangle<u32> depth_rect{};
|
||||
Surface depth_surface = nullptr;
|
||||
if (using_depth_fb)
|
||||
std::tie(depth_surface, depth_rect) =
|
||||
GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false);
|
||||
GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false, true);
|
||||
|
||||
Common::Rectangle<u32> fb_rect{};
|
||||
if (color_surface != nullptr && depth_surface != nullptr) {
|
||||
@ -1450,13 +1444,13 @@ void RasterizerCacheVulkan::InvalidateRegion(PAddr addr, u32 size, const Surface
|
||||
remove_surfaces.clear();
|
||||
}
|
||||
|
||||
Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params) {
|
||||
Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params, bool framebuffer) {
|
||||
Surface surface = std::make_shared<CachedSurface>(*this);
|
||||
static_cast<SurfaceParams&>(*surface) = params;
|
||||
|
||||
surface->invalid_regions.insert(surface->GetInterval());
|
||||
AllocateTexture(surface->texture, params.type, GetFormatTuple(surface->pixel_format),
|
||||
surface->GetScaledWidth(), surface->GetScaledHeight());
|
||||
surface->GetScaledWidth(), surface->GetScaledHeight(), framebuffer);
|
||||
return surface;
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include <unordered_map>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <robin_hood.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
@ -22,6 +23,10 @@
|
||||
#include "video_core/renderer_vulkan/vk_texture.h"
|
||||
#include "video_core/texture/texture_decode.h"
|
||||
|
||||
// Can be changed later here
|
||||
template <typename Key, typename T, typename Hash = typename Key::Hash>
|
||||
using HashMap = robin_hood::unordered_flat_map<Key, T, Hash>;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class RasterizerCacheVulkan;
|
||||
@ -31,32 +36,25 @@ class FormatReinterpreterVulkan;
|
||||
vk::Format GetFormatTuple(SurfaceParams::PixelFormat pixel_format);
|
||||
|
||||
struct HostTextureTag {
|
||||
vk::Format format;
|
||||
u32 width;
|
||||
u32 height;
|
||||
bool operator==(const HostTextureTag& rhs) const noexcept {
|
||||
return std::tie(format, width, height) == std::tie(rhs.format, rhs.width, rhs.height);
|
||||
};
|
||||
vk::Format format = vk::Format::eUndefined;
|
||||
u32 width = 0, height = 0;
|
||||
|
||||
// Enable comparisons
|
||||
auto operator<=>(const HostTextureTag& other) const = default;
|
||||
};
|
||||
|
||||
struct TextureCubeConfig {
|
||||
PAddr px;
|
||||
PAddr nx;
|
||||
PAddr py;
|
||||
PAddr ny;
|
||||
PAddr pz;
|
||||
PAddr nz;
|
||||
u32 width;
|
||||
PAddr px = 0;
|
||||
PAddr nx = 0;
|
||||
PAddr py = 0;
|
||||
PAddr ny = 0;
|
||||
PAddr pz = 0;
|
||||
PAddr nz = 0;
|
||||
u32 width = 0;
|
||||
Pica::TexturingRegs::TextureFormat format;
|
||||
|
||||
bool operator==(const TextureCubeConfig& rhs) const {
|
||||
return std::tie(px, nx, py, ny, pz, nz, width, format) ==
|
||||
std::tie(rhs.px, rhs.nx, rhs.py, rhs.ny, rhs.pz, rhs.nz, rhs.width, rhs.format);
|
||||
}
|
||||
|
||||
bool operator!=(const TextureCubeConfig& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
// Enable comparisons
|
||||
auto operator<=>(const TextureCubeConfig& other) const = default;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@ -98,6 +96,7 @@ using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterva
|
||||
using SurfaceMap =
|
||||
boost::icl::interval_map<PAddr, Surface, boost::icl::partial_absorber, std::less,
|
||||
boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
|
||||
|
||||
using SurfaceCache =
|
||||
boost::icl::interval_map<PAddr, SurfaceSet, boost::icl::partial_absorber, std::less,
|
||||
boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
|
||||
@ -109,8 +108,6 @@ static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval
|
||||
using SurfaceRect_Tuple = std::tuple<Surface, Common::Rectangle<u32>>;
|
||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
|
||||
|
||||
using PageMap = boost::icl::interval_map<u32, int>;
|
||||
|
||||
enum class ScaleMatch {
|
||||
Exact, // only accept same res scale
|
||||
Upscale, // only allow higher scale than params
|
||||
@ -265,7 +262,7 @@ public:
|
||||
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
|
||||
/// 3DS memory to OpenGL and caches it (if not already cached)
|
||||
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
|
||||
bool load_if_create);
|
||||
bool load_if_create, bool framebuffer = false);
|
||||
|
||||
/// Get a surface based on the texture configuration
|
||||
Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
|
||||
@ -306,9 +303,9 @@ private:
|
||||
void ValidateSurface(const Surface& surface, PAddr addr, u32 size);
|
||||
|
||||
// Returns false if there is a surface in the cache at the interval with the same bit-width,
|
||||
bool NoUnimplementedReinterpretations(const Vulkan::Surface& surface,
|
||||
Vulkan::SurfaceParams& params,
|
||||
const Vulkan::SurfaceInterval& interval);
|
||||
bool NoUnimplementedReinterpretations(const Surface& surface,
|
||||
SurfaceParams& params,
|
||||
const SurfaceInterval& interval);
|
||||
|
||||
// Return true if a surface with an invalid pixel format exists at the interval
|
||||
bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval);
|
||||
@ -318,7 +315,7 @@ private:
|
||||
const SurfaceInterval& interval);
|
||||
|
||||
/// Create a new surface
|
||||
Surface CreateSurface(const SurfaceParams& params);
|
||||
Surface CreateSurface(const SurfaceParams& params, bool framebuffer = false);
|
||||
|
||||
/// Register surface into the cache
|
||||
void RegisterSurface(const Surface& surface);
|
||||
@ -330,20 +327,20 @@ private:
|
||||
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
|
||||
|
||||
SurfaceCache surface_cache;
|
||||
PageMap cached_pages;
|
||||
boost::icl::interval_map<u32, int> cached_pages;
|
||||
SurfaceMap dirty_regions;
|
||||
SurfaceSet remove_surfaces;
|
||||
|
||||
u16 resolution_scale_factor;
|
||||
|
||||
// Texture cube cache
|
||||
std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
|
||||
public:
|
||||
void AllocateTexture(Texture& target, SurfaceParams::SurfaceType type, vk::Format format,
|
||||
u32 width, u32 height);
|
||||
std::unique_ptr<FormatReinterpreterVulkan> format_reinterpreter;
|
||||
u32 width, u32 height, bool framebuffer);
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
} // namespace Vulkan
|
||||
|
234
src/video_core/renderer_vulkan/vk_shader.cpp
Normal file
234
src/video_core/renderer_vulkan/vk_shader.cpp
Normal file
@ -0,0 +1,234 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <glslang/SPIRV/GlslangToSpv.h>
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
|
||||
constexpr TBuiltInResource DefaultTBuiltInResource = {
|
||||
.maxLights = 32,
|
||||
.maxClipPlanes = 6,
|
||||
.maxTextureUnits = 32,
|
||||
.maxTextureCoords = 32,
|
||||
.maxVertexAttribs = 64,
|
||||
.maxVertexUniformComponents = 4096,
|
||||
.maxVaryingFloats = 64,
|
||||
.maxVertexTextureImageUnits = 32,
|
||||
.maxCombinedTextureImageUnits = 80,
|
||||
.maxTextureImageUnits = 32,
|
||||
.maxFragmentUniformComponents = 4096,
|
||||
.maxDrawBuffers = 32,
|
||||
.maxVertexUniformVectors = 128,
|
||||
.maxVaryingVectors = 8,
|
||||
.maxFragmentUniformVectors = 16,
|
||||
.maxVertexOutputVectors = 16,
|
||||
.maxFragmentInputVectors = 15,
|
||||
.minProgramTexelOffset = -8,
|
||||
.maxProgramTexelOffset = 7,
|
||||
.maxClipDistances = 8,
|
||||
.maxComputeWorkGroupCountX = 65535,
|
||||
.maxComputeWorkGroupCountY = 65535,
|
||||
.maxComputeWorkGroupCountZ = 65535,
|
||||
.maxComputeWorkGroupSizeX = 1024,
|
||||
.maxComputeWorkGroupSizeY = 1024,
|
||||
.maxComputeWorkGroupSizeZ = 64,
|
||||
.maxComputeUniformComponents = 1024,
|
||||
.maxComputeTextureImageUnits = 16,
|
||||
.maxComputeImageUniforms = 8,
|
||||
.maxComputeAtomicCounters = 8,
|
||||
.maxComputeAtomicCounterBuffers = 1,
|
||||
.maxVaryingComponents = 60,
|
||||
.maxVertexOutputComponents = 64,
|
||||
.maxGeometryInputComponents = 64,
|
||||
.maxGeometryOutputComponents = 128,
|
||||
.maxFragmentInputComponents = 128,
|
||||
.maxImageUnits = 8,
|
||||
.maxCombinedImageUnitsAndFragmentOutputs = 8,
|
||||
.maxCombinedShaderOutputResources = 8,
|
||||
.maxImageSamples = 0,
|
||||
.maxVertexImageUniforms = 0,
|
||||
.maxTessControlImageUniforms = 0,
|
||||
.maxTessEvaluationImageUniforms = 0,
|
||||
.maxGeometryImageUniforms = 0,
|
||||
.maxFragmentImageUniforms = 8,
|
||||
.maxCombinedImageUniforms = 8,
|
||||
.maxGeometryTextureImageUnits = 16,
|
||||
.maxGeometryOutputVertices = 256,
|
||||
.maxGeometryTotalOutputComponents = 1024,
|
||||
.maxGeometryUniformComponents = 1024,
|
||||
.maxGeometryVaryingComponents = 64,
|
||||
.maxTessControlInputComponents = 128,
|
||||
.maxTessControlOutputComponents = 128,
|
||||
.maxTessControlTextureImageUnits = 16,
|
||||
.maxTessControlUniformComponents = 1024,
|
||||
.maxTessControlTotalOutputComponents = 4096,
|
||||
.maxTessEvaluationInputComponents = 128,
|
||||
.maxTessEvaluationOutputComponents = 128,
|
||||
.maxTessEvaluationTextureImageUnits = 16,
|
||||
.maxTessEvaluationUniformComponents = 1024,
|
||||
.maxTessPatchComponents = 120,
|
||||
.maxPatchVertices = 32,
|
||||
.maxTessGenLevel = 64,
|
||||
.maxViewports = 16,
|
||||
.maxVertexAtomicCounters = 0,
|
||||
.maxTessControlAtomicCounters = 0,
|
||||
.maxTessEvaluationAtomicCounters = 0,
|
||||
.maxGeometryAtomicCounters = 0,
|
||||
.maxFragmentAtomicCounters = 8,
|
||||
.maxCombinedAtomicCounters = 8,
|
||||
.maxAtomicCounterBindings = 1,
|
||||
.maxVertexAtomicCounterBuffers = 0,
|
||||
.maxTessControlAtomicCounterBuffers = 0,
|
||||
.maxTessEvaluationAtomicCounterBuffers = 0,
|
||||
.maxGeometryAtomicCounterBuffers = 0,
|
||||
.maxFragmentAtomicCounterBuffers = 1,
|
||||
.maxCombinedAtomicCounterBuffers = 1,
|
||||
.maxAtomicCounterBufferSize = 16384,
|
||||
.maxTransformFeedbackBuffers = 4,
|
||||
.maxTransformFeedbackInterleavedComponents = 64,
|
||||
.maxCullDistances = 8,
|
||||
.maxCombinedClipAndCullDistances = 8,
|
||||
.maxSamples = 4,
|
||||
.maxMeshOutputVerticesNV = 256,
|
||||
.maxMeshOutputPrimitivesNV = 512,
|
||||
.maxMeshWorkGroupSizeX_NV = 32,
|
||||
.maxMeshWorkGroupSizeY_NV = 1,
|
||||
.maxMeshWorkGroupSizeZ_NV = 1,
|
||||
.maxTaskWorkGroupSizeX_NV = 32,
|
||||
.maxTaskWorkGroupSizeY_NV = 1,
|
||||
.maxTaskWorkGroupSizeZ_NV = 1,
|
||||
.maxMeshViewCountNV = 4,
|
||||
.maxDualSourceDrawBuffersEXT = 1,
|
||||
.limits = TLimits{
|
||||
.nonInductiveForLoops = 1,
|
||||
.whileLoops = 1,
|
||||
.doWhileLoops = 1,
|
||||
.generalUniformIndexing = 1,
|
||||
.generalAttributeMatrixVectorIndexing = 1,
|
||||
.generalVaryingIndexing = 1,
|
||||
.generalSamplerIndexing = 1,
|
||||
.generalVariableIndexing = 1,
|
||||
.generalConstantMatrixVectorIndexing = 1,
|
||||
}};
|
||||
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
EShLanguage ToEshShaderStage(ShaderStage stage) {
|
||||
switch (stage) {
|
||||
case ShaderStage::Vertex:
|
||||
return EShLanguage::EShLangVertex;
|
||||
case ShaderStage::Geometry:
|
||||
return EShLanguage::EShLangGeometry;
|
||||
case ShaderStage::Fragment:
|
||||
return EShLanguage::EShLangFragment;
|
||||
case ShaderStage::Compute:
|
||||
return EShLanguage::EShLangCompute;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unkown shader stage");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
bool InitializeCompiler() {
|
||||
static bool glslang_initialized = false;
|
||||
|
||||
if (glslang_initialized) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!glslang::InitializeProcess()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::atexit([]() { glslang::FinalizeProcess(); });
|
||||
|
||||
glslang_initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
Shader::Shader(Instance& instance, ShaderStage stage, std::string_view name,
|
||||
std::string&& source) :
|
||||
ShaderBase(stage, name, std::move(source)), instance(instance) {
|
||||
}
|
||||
|
||||
Shader::~Shader() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
bool Shader::Compile(ShaderOptimization level) {
|
||||
if (!InitializeCompiler()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
EProfile profile = ECoreProfile;
|
||||
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
|
||||
EShLanguage lang = ToEshShaderStage(stage);
|
||||
|
||||
int default_version = 450;
|
||||
const char* pass_source_code = source.c_str();
|
||||
int pass_source_code_length = source.size();
|
||||
|
||||
auto shader = std::make_unique<glslang::TShader>(lang);
|
||||
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
|
||||
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
|
||||
|
||||
glslang::TShader::ForbidIncluder includer;
|
||||
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Even though there's only a single shader, we still need to link it to generate SPV
|
||||
auto program = std::make_unique<glslang::TProgram>();
|
||||
program->addShader(shader.get());
|
||||
if (!program->link(messages)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
|
||||
return false;
|
||||
}
|
||||
|
||||
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
|
||||
std::vector<u32> out_code;
|
||||
spv::SpvBuildLogger logger;
|
||||
glslang::SpvOptions options;
|
||||
|
||||
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
|
||||
if (level == ShaderOptimization::Debug) {
|
||||
intermediate->addSourceText(pass_source_code, pass_source_code_length);
|
||||
options.generateDebugInfo = true;
|
||||
options.disableOptimizer = true;
|
||||
options.optimizeSize = false;
|
||||
options.disassemble = false;
|
||||
options.validate = true;
|
||||
} else {
|
||||
options.disableOptimizer = false;
|
||||
options.stripDebugInfo = true;
|
||||
}
|
||||
|
||||
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
|
||||
|
||||
const std::string spv_messages = logger.getAllMessages();
|
||||
if (!spv_messages.empty()) {
|
||||
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
|
||||
}
|
||||
|
||||
const vk::ShaderModuleCreateInfo shader_info = {
|
||||
.codeSize = out_code.size() * sizeof(u32),
|
||||
.pCode = out_code.data()
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
module = device.createShaderModule(shader_info);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
32
src/video_core/renderer_vulkan/vk_shader.h
Normal file
32
src/video_core/renderer_vulkan/vk_shader.h
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/common/shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
class Instance;
|
||||
|
||||
class Shader : public VideoCore::ShaderBase {
|
||||
public:
|
||||
Shader(Instance& instance, ShaderStage stage, std::string_view name,
|
||||
std::string&& source);
|
||||
~Shader() override;
|
||||
|
||||
bool Compile(ShaderOptimization level) override;
|
||||
|
||||
/// Returns the underlying vulkan shader module handle
|
||||
vk::ShaderModule GetHandle() const {
|
||||
return module;
|
||||
}
|
||||
|
||||
private:
|
||||
Instance& instance;
|
||||
vk::ShaderModule module;
|
||||
};
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
@ -2,28 +2,12 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <string_view>
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/bit_set.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_lighting.h"
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <glslang/SPIRV/GlslangToSpv.h>
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
|
||||
using Pica::FramebufferRegs;
|
||||
using Pica::LightingRegs;
|
||||
@ -32,56 +16,7 @@ using Pica::TexturingRegs;
|
||||
using TevStageConfig = TexturingRegs::TevStageConfig;
|
||||
using VSOutputAttributes = RasterizerRegs::VSOutputAttributes;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static const char present_vertex_shader_source[] = R"(
|
||||
#version 450 core
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
layout (location = 0) in vec2 vert_position;
|
||||
layout (location = 1) in vec3 vert_tex_coord;
|
||||
layout (location = 0) out vec3 frag_tex_coord;
|
||||
|
||||
layout (push_constant) uniform DrawInfo {
|
||||
mat4 modelview_matrix;
|
||||
vec4 i_resolution;
|
||||
vec4 o_resolution;
|
||||
int layer;
|
||||
};
|
||||
|
||||
void main() {
|
||||
vec4 position = vec4(vert_position, 0.0, 1.0) * modelview_matrix;
|
||||
gl_Position = vec4(position.x, -position.y, 0.0, 1.0);
|
||||
frag_tex_coord = vert_tex_coord;
|
||||
}
|
||||
)";
|
||||
|
||||
static const char present_fragment_shader_source[] = R"(
|
||||
#version 450 core
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
layout (location = 0) in vec3 frag_tex_coord;
|
||||
layout (location = 0) out vec4 color;
|
||||
|
||||
layout (push_constant) uniform DrawInfo {
|
||||
mat3x2 modelview_matrix;
|
||||
vec4 i_resolution;
|
||||
vec4 o_resolution;
|
||||
int layer;
|
||||
};
|
||||
|
||||
layout (set = 0, binding = 0) uniform sampler2D screen_textures[3];
|
||||
|
||||
void main() {
|
||||
color = texture(screen_textures[int(frag_tex_coord.z)], frag_tex_coord.xy);
|
||||
}
|
||||
)";
|
||||
|
||||
std::string GetPresentVertexShader() {
|
||||
return present_vertex_shader_source;
|
||||
}
|
||||
|
||||
std::string GetPresentFragmentShader() {
|
||||
return present_fragment_shader_source;
|
||||
}
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
constexpr std::string_view UniformBlockDef = R"(
|
||||
#define NUM_TEV_STAGES 6
|
||||
@ -162,184 +97,6 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_
|
||||
return out;
|
||||
}
|
||||
|
||||
PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
|
||||
PicaFSConfig res{};
|
||||
|
||||
auto& state = res.state;
|
||||
|
||||
state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
|
||||
|
||||
state.depthmap_enable = regs.rasterizer.depthmap_enable;
|
||||
|
||||
state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
|
||||
? regs.framebuffer.output_merger.alpha_test.func.Value()
|
||||
: FramebufferRegs::CompareFunc::Always;
|
||||
|
||||
state.texture0_type = regs.texturing.texture0.type;
|
||||
|
||||
state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
|
||||
|
||||
// We don't need these otherwise, reset them to avoid unnecessary shader generation
|
||||
state.alphablend_enable = {};
|
||||
state.logic_op = {};
|
||||
|
||||
// Copy relevant tev stages fields.
|
||||
// We don't sync const_color here because of the high variance, it is a
|
||||
// shader uniform instead.
|
||||
const auto& tev_stages = regs.texturing.GetTevStages();
|
||||
DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
|
||||
for (std::size_t i = 0; i < tev_stages.size(); i++) {
|
||||
const auto& tev_stage = tev_stages[i];
|
||||
state.tev_stages[i].sources_raw = tev_stage.sources_raw;
|
||||
state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
|
||||
state.tev_stages[i].ops_raw = tev_stage.ops_raw;
|
||||
state.tev_stages[i].scales_raw = tev_stage.scales_raw;
|
||||
}
|
||||
|
||||
state.fog_mode = regs.texturing.fog_mode;
|
||||
state.fog_flip = regs.texturing.fog_flip != 0;
|
||||
|
||||
state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
|
||||
regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
|
||||
<< 4;
|
||||
|
||||
// Fragment lighting
|
||||
|
||||
state.lighting.enable = !regs.lighting.disable;
|
||||
state.lighting.src_num = regs.lighting.max_light_index + 1;
|
||||
|
||||
for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
|
||||
unsigned num = regs.lighting.light_enable.GetNum(light_index);
|
||||
const auto& light = regs.lighting.light[num];
|
||||
state.lighting.light[light_index].num = num;
|
||||
state.lighting.light[light_index].directional = light.config.directional != 0;
|
||||
state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
|
||||
state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
|
||||
state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
|
||||
state.lighting.light[light_index].dist_atten_enable =
|
||||
!regs.lighting.IsDistAttenDisabled(num);
|
||||
state.lighting.light[light_index].spot_atten_enable =
|
||||
!regs.lighting.IsSpotAttenDisabled(num);
|
||||
state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num);
|
||||
}
|
||||
|
||||
state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
|
||||
state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
|
||||
state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
|
||||
state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
|
||||
|
||||
state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
|
||||
state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
|
||||
state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
|
||||
state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
|
||||
|
||||
// this is a dummy field due to lack of the corresponding register
|
||||
state.lighting.lut_sp.enable = true;
|
||||
state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
|
||||
state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
|
||||
state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
|
||||
|
||||
state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
|
||||
state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
|
||||
state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
|
||||
state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
|
||||
|
||||
state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
|
||||
state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
|
||||
state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
|
||||
state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
|
||||
|
||||
state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
|
||||
state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
|
||||
state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
|
||||
state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
|
||||
|
||||
state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
|
||||
state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
|
||||
state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
|
||||
state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
|
||||
|
||||
state.lighting.config = regs.lighting.config0.config;
|
||||
state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
|
||||
state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
|
||||
state.lighting.bump_mode = regs.lighting.config0.bump_mode;
|
||||
state.lighting.bump_selector = regs.lighting.config0.bump_selector;
|
||||
state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
|
||||
state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
|
||||
|
||||
state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
|
||||
state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
|
||||
state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
|
||||
state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
|
||||
state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
|
||||
state.lighting.shadow_selector = regs.lighting.config0.shadow_selector;
|
||||
|
||||
state.proctex.enable = regs.texturing.main_config.texture3_enable;
|
||||
if (state.proctex.enable) {
|
||||
state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
|
||||
state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
|
||||
state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
|
||||
state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
|
||||
state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
|
||||
state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
|
||||
state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
|
||||
state.proctex.u_shift = regs.texturing.proctex.u_shift;
|
||||
state.proctex.v_shift = regs.texturing.proctex.v_shift;
|
||||
state.proctex.lut_width = regs.texturing.proctex_lut.width;
|
||||
state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
|
||||
state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
|
||||
state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
|
||||
state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
|
||||
state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
|
||||
state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
|
||||
state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
|
||||
}
|
||||
|
||||
state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
|
||||
FramebufferRegs::FragmentOperationMode::Shadow;
|
||||
|
||||
state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
program_hash = setup.GetProgramCodeHash();
|
||||
swizzle_hash = setup.GetSwizzleDataHash();
|
||||
main_offset = regs.main_offset;
|
||||
sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
|
||||
|
||||
num_outputs = 0;
|
||||
output_map.fill(16);
|
||||
|
||||
for (int reg : Common::BitSet<u32>(regs.output_mask)) {
|
||||
output_map[reg] = num_outputs++;
|
||||
}
|
||||
}
|
||||
|
||||
void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) {
|
||||
vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
|
||||
gs_output_attributes = vs_output_attributes;
|
||||
|
||||
semantic_maps.fill({16, 0});
|
||||
for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
|
||||
const std::array semantics{
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
|
||||
regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
|
||||
};
|
||||
for (u32 comp = 0; comp < 4; ++comp) {
|
||||
const auto semantic = semantics[comp];
|
||||
if (static_cast<std::size_t>(semantic) < 24) {
|
||||
semantic_maps[static_cast<std::size_t>(semantic)] = {attrib, comp};
|
||||
} else if (semantic != VSOutputAttributes::INVALID) {
|
||||
LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
|
||||
static bool IsPassThroughTevStage(const TevStageConfig& stage) {
|
||||
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||
@ -352,7 +109,7 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
|
||||
}
|
||||
|
||||
static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
|
||||
const auto& state = config.state;
|
||||
const auto& state = config;
|
||||
switch (texture_unit) {
|
||||
case 0:
|
||||
// Only unit 0 respects the texturing type
|
||||
@ -628,23 +385,22 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
|
||||
}
|
||||
|
||||
/// Writes the if-statement condition used to evaluate alpha testing
|
||||
static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) {
|
||||
using CompareFunc = FramebufferRegs::CompareFunc;
|
||||
static void AppendAlphaTestCondition(std::string& out, Pica::CompareFunc func) {
|
||||
switch (func) {
|
||||
case CompareFunc::Never:
|
||||
case Pica::CompareFunc::Never:
|
||||
out += "true";
|
||||
break;
|
||||
case CompareFunc::Always:
|
||||
case Pica::CompareFunc::Always:
|
||||
out += "false";
|
||||
break;
|
||||
case CompareFunc::Equal:
|
||||
case CompareFunc::NotEqual:
|
||||
case CompareFunc::LessThan:
|
||||
case CompareFunc::LessThanOrEqual:
|
||||
case CompareFunc::GreaterThan:
|
||||
case CompareFunc::GreaterThanOrEqual: {
|
||||
case Pica::CompareFunc::Equal:
|
||||
case Pica::CompareFunc::NotEqual:
|
||||
case Pica::CompareFunc::LessThan:
|
||||
case Pica::CompareFunc::LessThanOrEqual:
|
||||
case Pica::CompareFunc::GreaterThan:
|
||||
case Pica::CompareFunc::GreaterThanOrEqual: {
|
||||
static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"};
|
||||
const auto index = static_cast<u32>(func) - static_cast<u32>(CompareFunc::Equal);
|
||||
const auto index = static_cast<u32>(func) - static_cast<u32>(Pica::CompareFunc::Equal);
|
||||
out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]);
|
||||
break;
|
||||
}
|
||||
@ -659,7 +415,7 @@ static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareF
|
||||
/// Writes the code to emulate the specified TEV stage
|
||||
static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) {
|
||||
const auto stage =
|
||||
static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
|
||||
static_cast<const TexturingRegs::TevStageConfig>(config.tev_stages[index]);
|
||||
if (!IsPassThroughTevStage(stage)) {
|
||||
const std::string index_name = std::to_string(index);
|
||||
|
||||
@ -716,7 +472,7 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
|
||||
|
||||
/// Writes the code to emulate fragment lighting
|
||||
static void WriteLighting(std::string& out, const PicaFSConfig& config) {
|
||||
const auto& lighting = config.state.lighting;
|
||||
const auto& lighting = config.lighting;
|
||||
|
||||
// Define lighting globals
|
||||
out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
|
||||
@ -1119,7 +875,7 @@ float ProcTexLookupLUT(int offset, float coord) {
|
||||
)";
|
||||
|
||||
// Noise utility
|
||||
if (config.state.proctex.noise_enable) {
|
||||
if (config.proctex.noise_enable) {
|
||||
// See swrasterizer/proctex.cpp for more information about these functions
|
||||
out += R"(
|
||||
int ProcTexNoiseRand1D(int v) {
|
||||
@ -1159,16 +915,16 @@ float ProcTexNoiseCoef(vec2 x) {
|
||||
}
|
||||
|
||||
out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n";
|
||||
out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width);
|
||||
out += fmt::format("int lut_width = {} >> level;\n", config.proctex.lut_width);
|
||||
// Offsets for level 4-7 seem to be hardcoded
|
||||
out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n",
|
||||
config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
|
||||
config.state.proctex.lut_offset2, config.state.proctex.lut_offset3);
|
||||
config.proctex.lut_offset0, config.proctex.lut_offset1,
|
||||
config.proctex.lut_offset2, config.proctex.lut_offset3);
|
||||
out += "int lut_offset = lut_offsets[level];\n";
|
||||
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
|
||||
out += "lut_coord *= float(lut_width - 1);\n";
|
||||
|
||||
switch (config.state.proctex.lut_filter) {
|
||||
switch (config.proctex.lut_filter) {
|
||||
case ProcTexFilter::Linear:
|
||||
case ProcTexFilter::LinearMipmapLinear:
|
||||
case ProcTexFilter::LinearMipmapNearest:
|
||||
@ -1191,8 +947,8 @@ float ProcTexNoiseCoef(vec2 x) {
|
||||
out += "}\n";
|
||||
|
||||
out += "vec4 ProcTex() {\n";
|
||||
if (config.state.proctex.coord < 3) {
|
||||
out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord);
|
||||
if (config.proctex.coord < 3) {
|
||||
out += fmt::format("vec2 uv = abs(texcoord{});\n", config.proctex.coord);
|
||||
} else {
|
||||
LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3");
|
||||
out += "vec2 uv = abs(texcoord0);\n";
|
||||
@ -1205,23 +961,23 @@ float ProcTexNoiseCoef(vec2 x) {
|
||||
out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n";
|
||||
// unlike normal texture, the bias is inside the log2
|
||||
out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n",
|
||||
config.state.proctex.lut_width);
|
||||
config.proctex.lut_width);
|
||||
out += "if (proctex_bias == 0.0) lod = 0.0;\n";
|
||||
out += fmt::format("lod = clamp(lod, {:#}, {:#});\n",
|
||||
std::max(0.0f, static_cast<float>(config.state.proctex.lod_min)),
|
||||
std::min(7.0f, static_cast<float>(config.state.proctex.lod_max)));
|
||||
std::max(0.0f, static_cast<float>(config.proctex.lod_min)),
|
||||
std::min(7.0f, static_cast<float>(config.proctex.lod_max)));
|
||||
// Get shift offset before noise generation
|
||||
out += "float u_shift = ";
|
||||
AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift,
|
||||
config.state.proctex.u_clamp);
|
||||
AppendProcTexShiftOffset(out, "uv.y", config.proctex.u_shift,
|
||||
config.proctex.u_clamp);
|
||||
out += ";\n";
|
||||
out += "float v_shift = ";
|
||||
AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift,
|
||||
config.state.proctex.v_clamp);
|
||||
AppendProcTexShiftOffset(out, "uv.x", config.proctex.v_shift,
|
||||
config.proctex.v_clamp);
|
||||
out += ";\n";
|
||||
|
||||
// Generate noise
|
||||
if (config.state.proctex.noise_enable) {
|
||||
if (config.proctex.noise_enable) {
|
||||
out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n"
|
||||
"uv = abs(uv);\n";
|
||||
}
|
||||
@ -1231,16 +987,16 @@ float ProcTexNoiseCoef(vec2 x) {
|
||||
"float v = uv.y + v_shift;\n";
|
||||
|
||||
// Clamp
|
||||
AppendProcTexClamp(out, "u", config.state.proctex.u_clamp);
|
||||
AppendProcTexClamp(out, "v", config.state.proctex.v_clamp);
|
||||
AppendProcTexClamp(out, "u", config.proctex.u_clamp);
|
||||
AppendProcTexClamp(out, "v", config.proctex.v_clamp);
|
||||
|
||||
// Combine and map
|
||||
out += "float lut_coord = ";
|
||||
AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner,
|
||||
AppendProcTexCombineAndMap(out, config.proctex.color_combiner,
|
||||
"proctex_color_map_offset");
|
||||
out += ";\n";
|
||||
|
||||
switch (config.state.proctex.lut_filter) {
|
||||
switch (config.proctex.lut_filter) {
|
||||
case ProcTexFilter::Linear:
|
||||
case ProcTexFilter::Nearest:
|
||||
out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n";
|
||||
@ -1258,11 +1014,11 @@ float ProcTexNoiseCoef(vec2 x) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (config.state.proctex.separate_alpha) {
|
||||
if (config.proctex.separate_alpha) {
|
||||
// Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
|
||||
// uses the output of CombineAndMap directly instead.
|
||||
out += "float final_alpha = ";
|
||||
AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner,
|
||||
AppendProcTexCombineAndMap(out, config.proctex.alpha_combiner,
|
||||
"proctex_alpha_map_offset");
|
||||
out += ";\n";
|
||||
out += "return vec4(final_color.xyz, final_alpha);\n}\n";
|
||||
@ -1271,8 +1027,8 @@ float ProcTexNoiseCoef(vec2 x) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string GenerateFragmentShader(const PicaFSConfig& config) {
|
||||
const auto& state = config.state;
|
||||
std::string ShaderGenerator::GenerateFragmentShader(const PicaFSConfig& config, bool seperable_shader) {
|
||||
const auto& state = config;
|
||||
std::string out;
|
||||
|
||||
out += R"(
|
||||
@ -1387,7 +1143,7 @@ std::string GenerateFragmentShader(const PicaFSConfig& config) {
|
||||
vec4 shadowTexture(vec2 uv, float w) {
|
||||
)";
|
||||
|
||||
if (!config.state.shadow_texture_orthographic) {
|
||||
if (!config.shadow_texture_orthographic) {
|
||||
out += "uv /= w;";
|
||||
}
|
||||
|
||||
@ -1501,7 +1257,7 @@ vec4 shadowTextureCube(vec2 uv, float w) {
|
||||
#endif
|
||||
)";
|
||||
|
||||
if (config.state.proctex.enable)
|
||||
if (config.proctex.enable)
|
||||
AppendProcTexSampler(out, config);
|
||||
|
||||
// We round the interpolated primary color to the nearest 1/255th
|
||||
@ -1514,7 +1270,7 @@ vec4 secondary_fragment_color = vec4(0.0);
|
||||
)";
|
||||
|
||||
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
|
||||
if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) {
|
||||
if (state.alpha_test_func == Pica::CompareFunc::Never) {
|
||||
out += "discard; }";
|
||||
return out;
|
||||
}
|
||||
@ -1552,7 +1308,7 @@ vec4 secondary_fragment_color = vec4(0.0);
|
||||
WriteTevStage(out, config, static_cast<u32>(index));
|
||||
}
|
||||
|
||||
if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) {
|
||||
if (state.alpha_test_func != Pica::CompareFunc::Always) {
|
||||
out += "if (";
|
||||
AppendAlphaTestCondition(out, state.alpha_test_func);
|
||||
out += ") discard;\n";
|
||||
@ -1623,21 +1379,20 @@ do {
|
||||
return out;
|
||||
}
|
||||
|
||||
std::string GenerateTrivialVertexShader(bool separable_shader) {
|
||||
std::string ShaderGenerator::GenerateTrivialVertexShader(bool separable_shader) {
|
||||
std::string out;
|
||||
out += "#version 450\n";
|
||||
out += "#extension GL_ARB_separate_shader_objects : enable\n";
|
||||
out +=
|
||||
fmt::format("layout(location = {}) in vec4 vert_position;\n"
|
||||
"layout(location = {}) in vec4 vert_color;\n"
|
||||
"layout(location = {}) in vec2 vert_texcoord0;\n"
|
||||
"layout(location = {}) in vec2 vert_texcoord1;\n"
|
||||
"layout(location = {}) in vec2 vert_texcoord2;\n"
|
||||
"layout(location = {}) in float vert_texcoord0_w;\n"
|
||||
"layout(location = {}) in vec4 vert_normquat;\n"
|
||||
"layout(location = {}) in vec3 vert_view;\n",
|
||||
ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW);
|
||||
out += fmt::format("layout(location = {}) in vec4 vert_position;\n"
|
||||
"layout(location = {}) in vec4 vert_color;\n"
|
||||
"layout(location = {}) in vec2 vert_texcoord0;\n"
|
||||
"layout(location = {}) in vec2 vert_texcoord1;\n"
|
||||
"layout(location = {}) in vec2 vert_texcoord2;\n"
|
||||
"layout(location = {}) in float vert_texcoord0_w;\n"
|
||||
"layout(location = {}) in vec4 vert_normquat;\n"
|
||||
"layout(location = {}) in vec3 vert_view;\n",
|
||||
ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW);
|
||||
|
||||
out += GetVertexInterfaceDeclaration(true, separable_shader);
|
||||
|
||||
@ -1656,7 +1411,6 @@ void main() {
|
||||
|
||||
gl_Position = vert_position;
|
||||
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
|
||||
//gl_Position.y = -gl_Position.y;
|
||||
//gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
|
||||
//gl_ClipDistance[1] = dot(clip_coef, vert_position);
|
||||
}
|
||||
@ -1665,205 +1419,15 @@ void main() {
|
||||
return out;
|
||||
}
|
||||
|
||||
bool InitializeCompiler() {
|
||||
static bool glslang_initialized = false;
|
||||
|
||||
if (glslang_initialized) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!glslang::InitializeProcess()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::atexit([]() { glslang::FinalizeProcess(); });
|
||||
|
||||
glslang_initialized = true;
|
||||
return true;
|
||||
std::string ShaderGenerator::GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
|
||||
bool separable_shader) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const TBuiltInResource DefaultTBuiltInResource = {
|
||||
.maxLights = 32,
|
||||
.maxClipPlanes = 6,
|
||||
.maxTextureUnits = 32,
|
||||
.maxTextureCoords = 32,
|
||||
.maxVertexAttribs = 64,
|
||||
.maxVertexUniformComponents = 4096,
|
||||
.maxVaryingFloats = 64,
|
||||
.maxVertexTextureImageUnits = 32,
|
||||
.maxCombinedTextureImageUnits = 80,
|
||||
.maxTextureImageUnits = 32,
|
||||
.maxFragmentUniformComponents = 4096,
|
||||
.maxDrawBuffers = 32,
|
||||
.maxVertexUniformVectors = 128,
|
||||
.maxVaryingVectors = 8,
|
||||
.maxFragmentUniformVectors = 16,
|
||||
.maxVertexOutputVectors = 16,
|
||||
.maxFragmentInputVectors = 15,
|
||||
.minProgramTexelOffset = -8,
|
||||
.maxProgramTexelOffset = 7,
|
||||
.maxClipDistances = 8,
|
||||
.maxComputeWorkGroupCountX = 65535,
|
||||
.maxComputeWorkGroupCountY = 65535,
|
||||
.maxComputeWorkGroupCountZ = 65535,
|
||||
.maxComputeWorkGroupSizeX = 1024,
|
||||
.maxComputeWorkGroupSizeY = 1024,
|
||||
.maxComputeWorkGroupSizeZ = 64,
|
||||
.maxComputeUniformComponents = 1024,
|
||||
.maxComputeTextureImageUnits = 16,
|
||||
.maxComputeImageUniforms = 8,
|
||||
.maxComputeAtomicCounters = 8,
|
||||
.maxComputeAtomicCounterBuffers = 1,
|
||||
.maxVaryingComponents = 60,
|
||||
.maxVertexOutputComponents = 64,
|
||||
.maxGeometryInputComponents = 64,
|
||||
.maxGeometryOutputComponents = 128,
|
||||
.maxFragmentInputComponents = 128,
|
||||
.maxImageUnits = 8,
|
||||
.maxCombinedImageUnitsAndFragmentOutputs = 8,
|
||||
.maxCombinedShaderOutputResources = 8,
|
||||
.maxImageSamples = 0,
|
||||
.maxVertexImageUniforms = 0,
|
||||
.maxTessControlImageUniforms = 0,
|
||||
.maxTessEvaluationImageUniforms = 0,
|
||||
.maxGeometryImageUniforms = 0,
|
||||
.maxFragmentImageUniforms = 8,
|
||||
.maxCombinedImageUniforms = 8,
|
||||
.maxGeometryTextureImageUnits = 16,
|
||||
.maxGeometryOutputVertices = 256,
|
||||
.maxGeometryTotalOutputComponents = 1024,
|
||||
.maxGeometryUniformComponents = 1024,
|
||||
.maxGeometryVaryingComponents = 64,
|
||||
.maxTessControlInputComponents = 128,
|
||||
.maxTessControlOutputComponents = 128,
|
||||
.maxTessControlTextureImageUnits = 16,
|
||||
.maxTessControlUniformComponents = 1024,
|
||||
.maxTessControlTotalOutputComponents = 4096,
|
||||
.maxTessEvaluationInputComponents = 128,
|
||||
.maxTessEvaluationOutputComponents = 128,
|
||||
.maxTessEvaluationTextureImageUnits = 16,
|
||||
.maxTessEvaluationUniformComponents = 1024,
|
||||
.maxTessPatchComponents = 120,
|
||||
.maxPatchVertices = 32,
|
||||
.maxTessGenLevel = 64,
|
||||
.maxViewports = 16,
|
||||
.maxVertexAtomicCounters = 0,
|
||||
.maxTessControlAtomicCounters = 0,
|
||||
.maxTessEvaluationAtomicCounters = 0,
|
||||
.maxGeometryAtomicCounters = 0,
|
||||
.maxFragmentAtomicCounters = 8,
|
||||
.maxCombinedAtomicCounters = 8,
|
||||
.maxAtomicCounterBindings = 1,
|
||||
.maxVertexAtomicCounterBuffers = 0,
|
||||
.maxTessControlAtomicCounterBuffers = 0,
|
||||
.maxTessEvaluationAtomicCounterBuffers = 0,
|
||||
.maxGeometryAtomicCounterBuffers = 0,
|
||||
.maxFragmentAtomicCounterBuffers = 1,
|
||||
.maxCombinedAtomicCounterBuffers = 1,
|
||||
.maxAtomicCounterBufferSize = 16384,
|
||||
.maxTransformFeedbackBuffers = 4,
|
||||
.maxTransformFeedbackInterleavedComponents = 64,
|
||||
.maxCullDistances = 8,
|
||||
.maxCombinedClipAndCullDistances = 8,
|
||||
.maxSamples = 4,
|
||||
.maxMeshOutputVerticesNV = 256,
|
||||
.maxMeshOutputPrimitivesNV = 512,
|
||||
.maxMeshWorkGroupSizeX_NV = 32,
|
||||
.maxMeshWorkGroupSizeY_NV = 1,
|
||||
.maxMeshWorkGroupSizeZ_NV = 1,
|
||||
.maxTaskWorkGroupSizeX_NV = 32,
|
||||
.maxTaskWorkGroupSizeY_NV = 1,
|
||||
.maxTaskWorkGroupSizeZ_NV = 1,
|
||||
.maxMeshViewCountNV = 4,
|
||||
.maxDualSourceDrawBuffersEXT = 1,
|
||||
.limits = TLimits{
|
||||
.nonInductiveForLoops = 1,
|
||||
.whileLoops = 1,
|
||||
.doWhileLoops = 1,
|
||||
.generalUniformIndexing = 1,
|
||||
.generalAttributeMatrixVectorIndexing = 1,
|
||||
.generalVaryingIndexing = 1,
|
||||
.generalSamplerIndexing = 1,
|
||||
.generalVariableIndexing = 1,
|
||||
.generalConstantMatrixVectorIndexing = 1,
|
||||
}};
|
||||
|
||||
vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits vk_stage) {
|
||||
if (!InitializeCompiler()) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
EShLanguage stage;
|
||||
switch (vk_stage) {
|
||||
case vk::ShaderStageFlagBits::eVertex:
|
||||
stage = EShLangVertex;
|
||||
break;
|
||||
case vk::ShaderStageFlagBits::eFragment:
|
||||
stage = EShLangFragment;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown shader stage");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::unique_ptr<glslang::TShader> shader = std::make_unique<glslang::TShader>(stage);
|
||||
std::unique_ptr<glslang::TProgram> program;
|
||||
glslang::TShader::ForbidIncluder includer;
|
||||
EProfile profile = ECoreProfile;
|
||||
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
|
||||
|
||||
int default_version = 450;
|
||||
const char* pass_source_code = source.data();
|
||||
int pass_source_code_length = static_cast<int>(source.size());
|
||||
|
||||
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
|
||||
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
|
||||
|
||||
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Even though there's only a single shader, we still need to link it to generate SPV
|
||||
program = std::make_unique<glslang::TProgram>();
|
||||
program->addShader(shader.get());
|
||||
if (!program->link(messages)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
glslang::TIntermediate* intermediate = program->getIntermediate(stage);
|
||||
std::vector<u32> out_code;
|
||||
spv::SpvBuildLogger logger;
|
||||
glslang::SpvOptions options;
|
||||
|
||||
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
|
||||
if (true) {
|
||||
intermediate->addSourceText(pass_source_code, pass_source_code_length);
|
||||
options.generateDebugInfo = true;
|
||||
options.disableOptimizer = true;
|
||||
options.optimizeSize = false;
|
||||
options.disassemble = false;
|
||||
options.validate = true;
|
||||
}
|
||||
else {
|
||||
options.disableOptimizer = false;
|
||||
options.stripDebugInfo = true;
|
||||
}
|
||||
|
||||
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
|
||||
|
||||
const std::string spv_messages = logger.getAllMessages();
|
||||
if (!spv_messages.empty()) {
|
||||
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
|
||||
}
|
||||
|
||||
vk::ShaderModuleCreateInfo shader_info{{}, out_code.size() * sizeof(u32), out_code.data()};
|
||||
const vk::Device device = g_vk_instace->GetDevice();
|
||||
vk::ShaderModule shader_module = device.createShaderModule(shader_info);
|
||||
return shader_module;
|
||||
|
||||
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -4,46 +4,23 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include "common/hash.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_state.h"
|
||||
#include "video_core/common/shader_gen.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
/**
|
||||
* Returns the vertex and fragment shader sources used for presentation
|
||||
* @returns String of shader source code
|
||||
*/
|
||||
std::string GetPresentVertexShader();
|
||||
std::string GetPresentFragmentShader();
|
||||
class ShaderGenerator : public VideoCore::ShaderGeneratorBase {
|
||||
public:
|
||||
ShaderGenerator() = default;
|
||||
~ShaderGenerator() override = default;
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
|
||||
* and directly passes them to the fragment shader.
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateTrivialVertexShader(bool separable_shader);
|
||||
std::string GenerateTrivialVertexShader(bool separable_shader) override;
|
||||
|
||||
/**
|
||||
* Generates the GLSL fragment shader program source code for the current Pica state
|
||||
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
|
||||
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateFragmentShader(const PicaFSConfig& config);
|
||||
std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
|
||||
bool separable_shader) override;
|
||||
|
||||
/**
|
||||
* Generates a SPRI-V shader module from the provided GLSL source code
|
||||
*/
|
||||
vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits stage);
|
||||
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) override;
|
||||
|
||||
} // namespace Vulkan
|
||||
std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) override;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -14,10 +14,10 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/* Render vertex attributes */
|
||||
struct VertexBase {
|
||||
VertexBase() = default;
|
||||
VertexBase(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
@ -56,31 +56,12 @@ struct VertexBase {
|
||||
glm::vec3 view;
|
||||
};
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex : public VertexBase {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexBase(v, flip_quaternion) {};
|
||||
static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexBase));
|
||||
static constexpr std::array<vk::VertexInputAttributeDescription, 8> attribute_desc =
|
||||
{
|
||||
vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, position)),
|
||||
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, color)),
|
||||
vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord0)),
|
||||
vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord1)),
|
||||
vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord2)),
|
||||
vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexBase, tex_coord0_w)),
|
||||
vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, normquat)),
|
||||
vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexBase, view)),
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Vertex structure that the drawn screen rectangles are composed of.
|
||||
*/
|
||||
|
||||
struct ScreenRectVertexBase {
|
||||
ScreenRectVertexBase() = default;
|
||||
ScreenRectVertexBase(float x, float y, float u, float v, float s) {
|
||||
struct ScreenRectVertex {
|
||||
ScreenRectVertex() = default;
|
||||
ScreenRectVertex(float x, float y, float u, float v, float s) {
|
||||
position.x = x;
|
||||
position.y = y;
|
||||
tex_coord.x = u;
|
||||
@ -92,241 +73,4 @@ struct ScreenRectVertexBase {
|
||||
glm::vec3 tex_coord;
|
||||
};
|
||||
|
||||
struct ScreenRectVertex : public ScreenRectVertexBase {
|
||||
ScreenRectVertex() = default;
|
||||
ScreenRectVertex(float x, float y, float u, float v, float s) : ScreenRectVertexBase(x, y, u, v, s) {};
|
||||
static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertexBase));
|
||||
static constexpr std::array<vk::VertexInputAttributeDescription, 2> attribute_desc =
|
||||
{
|
||||
vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, offsetof(ScreenRectVertexBase, position)),
|
||||
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(ScreenRectVertexBase, tex_coord)),
|
||||
};
|
||||
};
|
||||
|
||||
enum class ProgramType : u32 { VS, GS, FS };
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
ATTRIBUTE_COLOR,
|
||||
ATTRIBUTE_TEXCOORD0,
|
||||
ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2,
|
||||
ATTRIBUTE_TEXCOORD0_W,
|
||||
ATTRIBUTE_NORMQUAT,
|
||||
ATTRIBUTE_VIEW,
|
||||
};
|
||||
|
||||
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
|
||||
struct TevStageConfigRaw {
|
||||
u32 sources_raw;
|
||||
u32 modifiers_raw;
|
||||
u32 ops_raw;
|
||||
u32 scales_raw;
|
||||
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
|
||||
Pica::TexturingRegs::TevStageConfig stage;
|
||||
stage.sources_raw = sources_raw;
|
||||
stage.modifiers_raw = modifiers_raw;
|
||||
stage.ops_raw = ops_raw;
|
||||
stage.const_color = 0;
|
||||
stage.scales_raw = scales_raw;
|
||||
return stage;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaFSConfigState {
|
||||
Pica::FramebufferRegs::CompareFunc alpha_test_func;
|
||||
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
|
||||
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
|
||||
bool texture2_use_coord1;
|
||||
std::array<TevStageConfigRaw, 6> tev_stages;
|
||||
u8 combiner_buffer_input;
|
||||
|
||||
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
|
||||
Pica::TexturingRegs::FogMode fog_mode;
|
||||
bool fog_flip;
|
||||
bool alphablend_enable;
|
||||
Pica::FramebufferRegs::LogicOp logic_op;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
unsigned num;
|
||||
bool directional;
|
||||
bool two_sided_diffuse;
|
||||
bool dist_atten_enable;
|
||||
bool spot_atten_enable;
|
||||
bool geometric_factor_0;
|
||||
bool geometric_factor_1;
|
||||
bool shadow_enable;
|
||||
} light[8];
|
||||
|
||||
bool enable;
|
||||
unsigned src_num;
|
||||
Pica::LightingRegs::LightingBumpMode bump_mode;
|
||||
unsigned bump_selector;
|
||||
bool bump_renorm;
|
||||
bool clamp_highlights;
|
||||
|
||||
Pica::LightingRegs::LightingConfig config;
|
||||
bool enable_primary_alpha;
|
||||
bool enable_secondary_alpha;
|
||||
|
||||
bool enable_shadow;
|
||||
bool shadow_primary;
|
||||
bool shadow_secondary;
|
||||
bool shadow_invert;
|
||||
bool shadow_alpha;
|
||||
unsigned shadow_selector;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
bool abs_input;
|
||||
Pica::LightingRegs::LightingLutInput type;
|
||||
float scale;
|
||||
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
u32 coord;
|
||||
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
|
||||
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
|
||||
bool separate_alpha;
|
||||
bool noise_enable;
|
||||
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
|
||||
u32 lut_width;
|
||||
u32 lut_offset0;
|
||||
u32 lut_offset1;
|
||||
u32 lut_offset2;
|
||||
u32 lut_offset3;
|
||||
u32 lod_min;
|
||||
u32 lod_max;
|
||||
Pica::TexturingRegs::ProcTexFilter lut_filter;
|
||||
} proctex;
|
||||
|
||||
bool shadow_rendering;
|
||||
bool shadow_texture_orthographic;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains all state used to generate the GLSL fragment shader that emulates the
|
||||
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
|
||||
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
|
||||
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
|
||||
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
|
||||
* two separate shaders sharing the same key.
|
||||
*/
|
||||
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
|
||||
|
||||
/// Construct a PicaFSConfig with the given Pica register configuration.
|
||||
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
|
||||
|
||||
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
|
||||
}
|
||||
|
||||
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
|
||||
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains common information to identify a GL vertex/geometry shader generated from
|
||||
* PICA vertex/geometry shader.
|
||||
*/
|
||||
struct PicaShaderConfigCommon {
|
||||
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
u64 program_hash;
|
||||
u64 swizzle_hash;
|
||||
u32 main_offset;
|
||||
bool sanitize_mul;
|
||||
|
||||
u32 num_outputs;
|
||||
|
||||
// output_map[output register index] -> output attribute index
|
||||
std::array<u32, 16> output_map;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL vertex shader generated from PICA vertex
|
||||
* shader.
|
||||
*/
|
||||
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
|
||||
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
state.Init(regs, setup);
|
||||
}
|
||||
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
|
||||
state = conf;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaGSConfigCommonRaw {
|
||||
void Init(const Pica::Regs& regs);
|
||||
|
||||
u32 vs_output_attributes;
|
||||
u32 gs_output_attributes;
|
||||
|
||||
struct SemanticMap {
|
||||
u32 attribute_index;
|
||||
u32 component_index;
|
||||
};
|
||||
|
||||
// semantic_maps[semantic name] -> GS output attribute index + component index
|
||||
std::array<SemanticMap, 24> semantic_maps;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
|
||||
* shader pipeline
|
||||
*/
|
||||
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
|
||||
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
|
||||
state.Init(regs);
|
||||
}
|
||||
};
|
||||
|
||||
struct PipelineCacheKey {
|
||||
vk::Format color, depth_stencil;
|
||||
vk::PipelineColorBlendAttachmentState blend_config;
|
||||
vk::LogicOp blend_logic_op;
|
||||
PicaFSConfig fragment_config;
|
||||
|
||||
auto operator <=>(const PipelineCacheKey& other) const = default;
|
||||
|
||||
u64 Hash() const {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(PipelineCacheKey));
|
||||
return static_cast<size_t>(hash);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::PicaFSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::PicaVSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::PicaFixedGSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::PipelineCacheKey> {
|
||||
size_t operator()(const Vulkan::PipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
@ -7,9 +7,9 @@
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_builder.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
@ -69,7 +69,7 @@ public:
|
||||
bool StencilTestEnabled() const { return stencil_enabled && stencil_writes; }
|
||||
|
||||
/// Configure drawing state
|
||||
void SetVertexBuffer(const Buffer& buffer, vk::DeviceSize offset);
|
||||
void SetVertexBuffer(const StreamBuffer& buffer, vk::DeviceSize offset);
|
||||
void SetViewport(vk::Viewport viewport);
|
||||
void SetScissor(vk::Rect2D scissor);
|
||||
void SetCullMode(vk::CullModeFlags flags);
|
||||
@ -100,9 +100,9 @@ public:
|
||||
void EndRendering();
|
||||
|
||||
/// Configure shader resources
|
||||
void SetUniformBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer);
|
||||
void SetUniformBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer);
|
||||
void SetTexture(u32 binding, const Texture& texture);
|
||||
void SetTexelBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer, u32 view_index);
|
||||
void SetTexelBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer, u32 view_index);
|
||||
void SetPresentTextures(vk::ImageView view0, vk::ImageView view1, vk::ImageView view2);
|
||||
void SetPresentData(DrawInfo data);
|
||||
void SetPlaceholderColor(u8 red, u8 green, u8 blue, u8 alpha);
|
||||
|
@ -2,60 +2,69 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <array>
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
Swapchain::Swapchain(vk::SurfaceKHR surface_) : surface(surface_) {
|
||||
Swapchain::Swapchain(Instance& instance, vk::SurfaceKHR surface) :
|
||||
instance(instance), surface(surface) {
|
||||
|
||||
}
|
||||
|
||||
Swapchain::~Swapchain() {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
auto instance = g_vk_instace->GetInstance();
|
||||
device.waitIdle();
|
||||
|
||||
// Destroy swapchain resources
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroySemaphore(render_finished);
|
||||
device.destroySemaphore(image_available);
|
||||
device.destroySwapchainKHR(swapchain);
|
||||
instance.destroySurfaceKHR(surface);
|
||||
}
|
||||
|
||||
bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
|
||||
void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
|
||||
is_outdated = false;
|
||||
is_suboptimal = false;
|
||||
|
||||
// Fetch information about the provided surface
|
||||
PopulateSwapchainDetails(surface, width, height);
|
||||
Configure(width, height);
|
||||
|
||||
const std::array indices {
|
||||
g_vk_instace->GetGraphicsQueueFamilyIndex(),
|
||||
g_vk_instace->GetPresentQueueFamilyIndex(),
|
||||
const std::array queue_family_indices = {
|
||||
instance.GetGraphicsQueueFamilyIndex(),
|
||||
instance.GetPresentQueueFamilyIndex(),
|
||||
};
|
||||
|
||||
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
|
||||
const u32 queue_family_indices_count = exclusive ? 2u : 1u;
|
||||
const vk::SharingMode sharing_mode = exclusive ? vk::SharingMode::eExclusive :
|
||||
vk::SharingMode::eConcurrent;
|
||||
|
||||
// Now we can actually create the swapchain
|
||||
vk::SwapchainCreateInfoKHR swapchain_info{{}, surface, details.image_count, details.format.format,
|
||||
details.format.colorSpace, details.extent, 1, vk::ImageUsageFlagBits::eColorAttachment,
|
||||
vk::SharingMode::eExclusive, 1, indices.data(), details.transform,
|
||||
vk::CompositeAlphaFlagBitsKHR::eOpaque, details.present_mode, true, swapchain};
|
||||
const vk::SwapchainCreateInfoKHR swapchain_info = {
|
||||
.surface = surface,
|
||||
.minImageCount = image_count,
|
||||
.imageFormat = surface_format.format,
|
||||
.imageColorSpace = surface_format.colorSpace,
|
||||
.imageExtent = extent,
|
||||
.imageArrayLayers = 1,
|
||||
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment,
|
||||
.imageSharingMode = sharing_mode,
|
||||
.queueFamilyIndexCount = queue_family_indices_count,
|
||||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.presentMode = present_mode,
|
||||
.clipped = true,
|
||||
.oldSwapchain = swapchain
|
||||
};
|
||||
|
||||
// For dedicated present queues, select concurrent sharing mode
|
||||
if (indices[0] != indices[1]) {
|
||||
swapchain_info.imageSharingMode = vk::SharingMode::eConcurrent;
|
||||
swapchain_info.queueFamilyIndexCount = 2;
|
||||
}
|
||||
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
auto new_swapchain = device.createSwapchainKHR(swapchain_info);
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
|
||||
|
||||
// If an old swapchain exists, destroy it and move the new one to its place.
|
||||
if (swapchain) {
|
||||
device.destroy(swapchain);
|
||||
if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
|
||||
device.destroySwapchainKHR(old_swapchain);
|
||||
}
|
||||
swapchain = new_swapchain;
|
||||
|
||||
// Create sync objects if not already created
|
||||
if (!image_available) {
|
||||
@ -67,19 +76,17 @@ bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
|
||||
}
|
||||
|
||||
// Create framebuffer and image views
|
||||
swapchain_images.clear();
|
||||
SetupImages();
|
||||
|
||||
return true;
|
||||
images = device.getSwapchainImagesKHR(swapchain);
|
||||
}
|
||||
|
||||
// Wait for maximum of 1 second
|
||||
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
|
||||
|
||||
void Swapchain::AcquireNextImage() {
|
||||
auto result = g_vk_instace->GetDevice().acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT,
|
||||
image_available, VK_NULL_HANDLE,
|
||||
&image_index);
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT,
|
||||
image_available, VK_NULL_HANDLE,
|
||||
¤t_image);
|
||||
switch (result) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
@ -90,15 +97,21 @@ void Swapchain::AcquireNextImage() {
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "acquireNextImageKHR returned unknown result");
|
||||
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Present() {
|
||||
const auto present_queue = g_vk_instace->GetPresentQueue();
|
||||
const vk::PresentInfoKHR present_info = {
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &render_finished,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = ¤t_image
|
||||
};
|
||||
|
||||
vk::PresentInfoKHR present_info(render_finished, swapchain, image_index);
|
||||
vk::Queue present_queue = instance.GetPresentQueue();
|
||||
vk::Result result = present_queue.presentKHR(present_info);
|
||||
|
||||
switch (result) {
|
||||
@ -115,91 +128,68 @@ void Swapchain::Present() {
|
||||
break;
|
||||
}
|
||||
|
||||
frame_index = (frame_index + 1) % swapchain_images.size();
|
||||
current_frame = (current_frame + 1) % images.size();
|
||||
}
|
||||
|
||||
void Swapchain::PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height) {
|
||||
auto gpu = g_vk_instace->GetPhysicalDevice();
|
||||
void Swapchain::Configure(u32 width, u32 height) {
|
||||
vk::PhysicalDevice physical = instance.GetPhysicalDevice();
|
||||
|
||||
// Choose surface format
|
||||
auto formats = gpu.getSurfaceFormatsKHR(surface);
|
||||
details.format = formats[0];
|
||||
auto formats = physical.getSurfaceFormatsKHR(surface);
|
||||
surface_format = formats[0];
|
||||
|
||||
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
|
||||
details.format = { vk::Format::eB8G8R8A8Unorm };
|
||||
}
|
||||
else {
|
||||
for (const auto& format : formats) {
|
||||
if (format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
|
||||
format.format == vk::Format::eB8G8R8A8Unorm) {
|
||||
details.format = format;
|
||||
break;
|
||||
}
|
||||
surface_format = vk::SurfaceFormatKHR{
|
||||
.format = vk::Format::eB8G8R8A8Unorm
|
||||
};
|
||||
} else {
|
||||
auto iter = std::find_if(formats.begin(), formats.end(), [](vk::SurfaceFormatKHR format) -> bool {
|
||||
return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
|
||||
format.format == vk::Format::eB8G8R8A8Unorm;
|
||||
});
|
||||
|
||||
if (iter == formats.end()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if a particular mode is supported, if it is, returns that mode.
|
||||
auto modes = gpu.getSurfacePresentModesKHR(surface);
|
||||
auto ModePresent = [&modes](vk::PresentModeKHR check_mode) {
|
||||
auto it = std::find_if(modes.begin(), modes.end(), [check_mode](const auto& mode) {
|
||||
return check_mode == mode;
|
||||
});
|
||||
|
||||
return it != modes.end();
|
||||
};
|
||||
auto modes = physical.getSurfacePresentModesKHR(surface);
|
||||
|
||||
// FIFO is guaranteed by the Vulkan standard to be available
|
||||
details.present_mode = vk::PresentModeKHR::eFifo;
|
||||
present_mode = vk::PresentModeKHR::eFifo;
|
||||
|
||||
auto iter = std::find_if(modes.begin(), modes.end(), [](vk::PresentModeKHR mode) {
|
||||
return vk::PresentModeKHR::eMailbox == mode;
|
||||
});
|
||||
|
||||
// Prefer Mailbox if present for lowest latency
|
||||
if (ModePresent(vk::PresentModeKHR::eMailbox)) {
|
||||
details.present_mode = vk::PresentModeKHR::eMailbox;
|
||||
if (iter != modes.end()) {
|
||||
present_mode = vk::PresentModeKHR::eMailbox;
|
||||
}
|
||||
|
||||
// Query surface extent
|
||||
auto capabilities = gpu.getSurfaceCapabilitiesKHR(surface);
|
||||
details.extent = capabilities.currentExtent;
|
||||
auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
|
||||
extent = capabilities.currentExtent;
|
||||
|
||||
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
|
||||
details.extent.width = std::clamp(width, capabilities.minImageExtent.width,
|
||||
extent.width = std::clamp(width, capabilities.minImageExtent.width,
|
||||
capabilities.maxImageExtent.width);
|
||||
details.extent.height = std::clamp(height, capabilities.minImageExtent.height,
|
||||
extent.height = std::clamp(height, capabilities.minImageExtent.height,
|
||||
capabilities.maxImageExtent.height);
|
||||
}
|
||||
|
||||
// Select number of images in swap chain, we prefer one buffer in the background to work on
|
||||
details.image_count = capabilities.minImageCount + 1;
|
||||
image_count = capabilities.minImageCount + 1;
|
||||
if (capabilities.maxImageCount > 0) {
|
||||
details.image_count = std::min(details.image_count, capabilities.maxImageCount);
|
||||
image_count = std::min(image_count, capabilities.maxImageCount);
|
||||
}
|
||||
|
||||
// Prefer identity transform if possible
|
||||
details.transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
|
||||
if (!(capabilities.supportedTransforms & details.transform)) {
|
||||
details.transform = capabilities.currentTransform;
|
||||
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
|
||||
if (!(capabilities.supportedTransforms & transform)) {
|
||||
transform = capabilities.currentTransform;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::SetupImages() {
|
||||
// Get the swap chain images
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
auto images = device.getSwapchainImagesKHR(swapchain);
|
||||
|
||||
Texture::Info image_info{
|
||||
.width = details.extent.width,
|
||||
.height = details.extent.height,
|
||||
.format = details.format.format,
|
||||
.type = vk::ImageType::e2D,
|
||||
.view_type = vk::ImageViewType::e2D,
|
||||
.usage = vk::ImageUsageFlagBits::eColorAttachment
|
||||
};
|
||||
|
||||
// Create the swapchain buffers containing the image and imageview
|
||||
swapchain_images.resize(images.size());
|
||||
for (int i = 0; i < swapchain_images.size(); i++) {
|
||||
// Wrap swapchain images with Texture
|
||||
swapchain_images[i].Adopt(image_info, images[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
} // namespace VideoCore::Vulkan
|
||||
|
@ -4,62 +4,90 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
struct SwapChainDetails {
|
||||
vk::SurfaceFormatKHR format;
|
||||
class Instance;
|
||||
|
||||
class Swapchain {
|
||||
public:
|
||||
Swapchain(Instance& instance, vk::SurfaceKHR surface);
|
||||
~Swapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height, bool vsync_enabled);
|
||||
|
||||
/// Acquire the next image in the swapchain.
|
||||
void AcquireNextImage();
|
||||
|
||||
/// Present the current image and move to the next one
|
||||
void Present();
|
||||
|
||||
/// Return current swapchain state
|
||||
inline vk::Extent2D GetExtent() const {
|
||||
return extent;
|
||||
}
|
||||
|
||||
/// Return the swapchain surface
|
||||
inline vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
/// Return the swapchain format
|
||||
inline vk::SurfaceFormatKHR GetSurfaceFormat() const {
|
||||
return surface_format;
|
||||
}
|
||||
|
||||
/// Return the Vulkan swapchain handle
|
||||
inline vk::SwapchainKHR GetHandle() const {
|
||||
return swapchain;
|
||||
}
|
||||
|
||||
/// Return the semaphore that will be signaled when vkAcquireNextImageKHR completes
|
||||
inline vk::Semaphore GetAvailableSemaphore() const {
|
||||
return image_available;
|
||||
}
|
||||
|
||||
/// Return the semaphore that will signal when the current image will be presented
|
||||
inline vk::Semaphore GetPresentSemaphore() const {
|
||||
return render_finished;
|
||||
}
|
||||
|
||||
/// Return the current swapchain image
|
||||
inline vk::Image GetCurrentImage() {
|
||||
return images[current_image];
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain should be recreated
|
||||
inline bool NeedsRecreation() const {
|
||||
return is_suboptimal || is_outdated;
|
||||
}
|
||||
|
||||
private:
|
||||
void Configure(u32 width, u32 height);
|
||||
|
||||
private:
|
||||
Instance& instance;
|
||||
vk::SwapchainKHR swapchain = VK_NULL_HANDLE;
|
||||
vk::SurfaceKHR surface = VK_NULL_HANDLE;
|
||||
|
||||
// Swapchain properties
|
||||
vk::SurfaceFormatKHR surface_format;
|
||||
vk::PresentModeKHR present_mode;
|
||||
vk::Extent2D extent;
|
||||
vk::SurfaceTransformFlagBitsKHR transform;
|
||||
u32 image_count;
|
||||
};
|
||||
|
||||
class Swapchain {
|
||||
public:
|
||||
Swapchain(vk::SurfaceKHR surface);
|
||||
~Swapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
bool Create(u32 width, u32 height, bool vsync_enabled);
|
||||
|
||||
/// Acquire the next image in the swapchain.
|
||||
void AcquireNextImage();
|
||||
void Present();
|
||||
|
||||
/// Returns true when the swapchain needs to be recreated.
|
||||
bool NeedsRecreation() const { return IsSubOptimal() || IsOutDated(); }
|
||||
bool IsOutDated() const { return is_outdated; }
|
||||
bool IsSubOptimal() const { return is_suboptimal; }
|
||||
bool IsVSyncEnabled() const { return vsync_enabled; }
|
||||
u32 GetCurrentImageIndex() const { return image_index; }
|
||||
|
||||
/// Get current swapchain state
|
||||
vk::Extent2D GetSize() const { return details.extent; }
|
||||
vk::SurfaceKHR GetSurface() const { return surface; }
|
||||
vk::SurfaceFormatKHR GetSurfaceFormat() const { return details.format; }
|
||||
vk::SwapchainKHR GetSwapChain() const { return swapchain; }
|
||||
const vk::Semaphore& GetAvailableSemaphore() const { return image_available; }
|
||||
const vk::Semaphore& GetRenderSemaphore() const { return render_finished; }
|
||||
Texture& GetCurrentImage() { return swapchain_images[image_index]; }
|
||||
|
||||
private:
|
||||
void PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height);
|
||||
void SetupImages();
|
||||
|
||||
private:
|
||||
SwapChainDetails details{};
|
||||
vk::SurfaceKHR surface;
|
||||
// Swapchain state
|
||||
std::vector<vk::Image> images;
|
||||
vk::Semaphore image_available, render_finished;
|
||||
bool vsync_enabled{false}, is_outdated{true}, is_suboptimal{true};
|
||||
|
||||
vk::SwapchainKHR swapchain{VK_NULL_HANDLE};
|
||||
std::vector<Texture> swapchain_images;
|
||||
u32 image_index{0}, frame_index{0};
|
||||
u32 current_image = 0, current_frame = 0;
|
||||
bool vsync_enabled = false;
|
||||
bool is_outdated = true;
|
||||
bool is_suboptimal = true;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -2,232 +2,185 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
TaskScheduler::~TaskScheduler() {
|
||||
// 16MB should be enough for a single frame
|
||||
constexpr BufferInfo STAGING_INFO = {
|
||||
.capacity = 16 * 1024 * 1024,
|
||||
.usage = BufferUsage::Staging
|
||||
};
|
||||
|
||||
CommandScheduler::CommandScheduler(Instance& instance) : instance(instance) {
|
||||
|
||||
}
|
||||
|
||||
CommandScheduler::~CommandScheduler() {
|
||||
// Destroy Vulkan resources
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
device.waitIdle();
|
||||
vk::Device device = instance.GetDevice();
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
for (auto& task : tasks) {
|
||||
task.staging.Destroy();
|
||||
device.destroyDescriptorPool(task.pool);
|
||||
for (auto& command : commands) {
|
||||
device.destroyFence(command.fence);
|
||||
|
||||
// Clean up any scheduled resources
|
||||
for (auto& func : command.cleanups) {
|
||||
func(device, allocator);
|
||||
}
|
||||
}
|
||||
|
||||
SyncToGPU();
|
||||
device.destroyCommandPool(command_pool);
|
||||
device.destroySemaphore(timeline);
|
||||
}
|
||||
|
||||
std::tuple<u8*, u32> TaskScheduler::RequestStaging(u32 size) {
|
||||
auto& task = tasks[current_task];
|
||||
if (size > STAGING_BUFFER_SIZE - task.current_offset) {
|
||||
// If we run out of space, allocate a new buffer.
|
||||
// The old one will be safely destroyed when the task finishes
|
||||
task.staging.Recreate();
|
||||
task.current_offset = 0;
|
||||
|
||||
return std::make_tuple(task.staging.GetHostPointer(), 0);
|
||||
}
|
||||
|
||||
u8* ptr = task.staging.GetHostPointer() + task.current_offset;
|
||||
std::memset(ptr, 0, size);
|
||||
|
||||
task.current_offset += size;
|
||||
return std::make_tuple(ptr, task.current_offset - size);
|
||||
}
|
||||
|
||||
Buffer& TaskScheduler::GetStaging() {
|
||||
return tasks[current_task].staging;
|
||||
}
|
||||
|
||||
bool TaskScheduler::Create() {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
|
||||
// Create command pool
|
||||
vk::CommandPoolCreateInfo pool_info(vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
g_vk_instace->GetGraphicsQueueFamilyIndex());
|
||||
command_pool = device.createCommandPool(pool_info);
|
||||
|
||||
// Create timeline semaphore for syncronization
|
||||
vk::SemaphoreTypeCreateInfo timeline_info{vk::SemaphoreType::eTimeline, 0};
|
||||
vk::SemaphoreCreateInfo semaphore_info{{}, &timeline_info};
|
||||
|
||||
timeline = device.createSemaphore(semaphore_info);
|
||||
|
||||
Buffer::Info staging_info{
|
||||
.size = STAGING_BUFFER_SIZE,
|
||||
.properties = vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent,
|
||||
.usage = vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst
|
||||
bool CommandScheduler::Create() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
const vk::CommandPoolCreateInfo pool_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
|
||||
};
|
||||
|
||||
// Should be enough for a single frame
|
||||
const vk::DescriptorPoolSize pool_size{vk::DescriptorType::eCombinedImageSampler, 64};
|
||||
vk::DescriptorPoolCreateInfo pool_create_info{{}, 1024, pool_size};
|
||||
// Create command pool
|
||||
command_pool = device.createCommandPool(pool_info);
|
||||
|
||||
for (auto& task : tasks) {
|
||||
// Create command buffers
|
||||
vk::CommandBufferAllocateInfo buffer_info{command_pool, vk::CommandBufferLevel::ePrimary, 2};
|
||||
auto buffers = device.allocateCommandBuffers(buffer_info);
|
||||
std::ranges::copy_n(buffers.begin(), 2, task.command_buffers.begin());
|
||||
vk::CommandBufferAllocateInfo buffer_info = {
|
||||
.commandPool = command_pool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT
|
||||
};
|
||||
|
||||
// Create staging buffer
|
||||
task.staging.Create(staging_info);
|
||||
// Allocate all command buffers
|
||||
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
|
||||
|
||||
// Create descriptor pool
|
||||
task.pool = device.createDescriptorPool(pool_create_info);
|
||||
// Initialize command slots
|
||||
for (std::size_t i = 0; i < commands.size(); i++) {
|
||||
commands[i] = CommandSlot{
|
||||
.render_command_buffer = command_buffers[2 * i],
|
||||
.upload_command_buffer = command_buffers[2 * i + 1],
|
||||
.fence = device.createFence({}),
|
||||
.upload_buffer = std::make_unique<Buffer>(instance, *this, STAGING_INFO)
|
||||
};
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::CommandBuffer TaskScheduler::GetRenderCommandBuffer() const {
|
||||
const auto& task = tasks[current_task];
|
||||
return task.command_buffers[1];
|
||||
}
|
||||
|
||||
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
|
||||
auto& task = tasks[current_task];
|
||||
if (!task.use_upload_buffer) {
|
||||
auto& cmdbuffer = task.command_buffers[0];
|
||||
cmdbuffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
|
||||
task.use_upload_buffer = true;
|
||||
}
|
||||
|
||||
return task.command_buffers[0];
|
||||
}
|
||||
|
||||
vk::DescriptorPool TaskScheduler::GetDescriptorPool() const {
|
||||
const auto& task = tasks[current_task];
|
||||
return task.pool;
|
||||
}
|
||||
|
||||
void TaskScheduler::SyncToGPU(u64 task_index) {
|
||||
// No need to sync if the GPU already has finished the task
|
||||
auto tick = GetGPUTick();
|
||||
if (tasks[task_index].task_id <= tick) {
|
||||
void CommandScheduler::Synchronize() {
|
||||
// Don't synchronize the same command twicec
|
||||
CommandSlot& command = commands[current_command];
|
||||
if (command.fence_counter <= completed_fence_counter) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait for the task to complete
|
||||
vk::SemaphoreWaitInfo wait_info{{}, timeline, tasks[task_index].task_id};
|
||||
auto result = g_vk_instace->GetDevice().waitSemaphores(wait_info, UINT64_MAX);
|
||||
|
||||
if (result != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed waiting for timeline semaphore!");
|
||||
// Wait for this command buffer to be completed.
|
||||
vk::Device device = instance.GetDevice();
|
||||
if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Waiting for fences failed!");
|
||||
}
|
||||
|
||||
// Cleanup resources for command buffers that have completed along with the current one
|
||||
const u64 now_fence_counter = command.fence_counter;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
for (CommandSlot& command : commands) {
|
||||
if (command.fence_counter < now_fence_counter &&
|
||||
command.fence_counter > completed_fence_counter) {
|
||||
for (auto& func: command.cleanups) {
|
||||
func(device, allocator);
|
||||
}
|
||||
|
||||
command.cleanups.clear();
|
||||
}
|
||||
}
|
||||
|
||||
completed_fence_counter = now_fence_counter;
|
||||
}
|
||||
|
||||
void TaskScheduler::SyncToGPU() {
|
||||
SyncToGPU(current_task);
|
||||
}
|
||||
|
||||
u64 TaskScheduler::GetCPUTick() const {
|
||||
return current_task_id;
|
||||
}
|
||||
|
||||
u64 TaskScheduler::GetGPUTick() const {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
return device.getSemaphoreCounterValue(timeline);
|
||||
}
|
||||
|
||||
void TaskScheduler::Submit(bool wait_completion, bool present, Swapchain* swapchain) {
|
||||
// End the current task recording.
|
||||
auto& task = tasks[current_task];
|
||||
void CommandScheduler::Submit(bool wait_completion,
|
||||
vk::Semaphore wait_semaphore,
|
||||
vk::Semaphore signal_semaphore) {
|
||||
const CommandSlot& command = commands[current_command];
|
||||
|
||||
// End command buffers
|
||||
task.command_buffers[1].end();
|
||||
if (task.use_upload_buffer) {
|
||||
task.command_buffers[0].end();
|
||||
command.render_command_buffer.end();
|
||||
if (command.use_upload_buffer) {
|
||||
command.upload_command_buffer.end();
|
||||
}
|
||||
|
||||
const u32 num_signal_semaphores = present ? 2U : 1U;
|
||||
const std::array signal_values{task.task_id, u64(0)};
|
||||
std::array signal_semaphores{timeline, vk::Semaphore{}};
|
||||
|
||||
const u32 num_wait_semaphores = present ? 2U : 1U;
|
||||
const std::array wait_values{task.task_id - 1, u64(1)};
|
||||
std::array wait_semaphores{timeline, vk::Semaphore{}};
|
||||
|
||||
// When the task completes the timeline will increment to the task id
|
||||
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si{num_wait_semaphores, wait_values.data(),
|
||||
num_signal_semaphores, signal_values.data()};
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks{
|
||||
constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks{
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const u32 cmdbuffer_count = task.use_upload_buffer ? 2u : 1u;
|
||||
const vk::SubmitInfo submit_info{num_wait_semaphores, wait_semaphores.data(), wait_stage_masks.data(), cmdbuffer_count,
|
||||
&task.command_buffers[2 - cmdbuffer_count], num_signal_semaphores, signal_semaphores.data(),
|
||||
&timeline_si};
|
||||
const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
|
||||
const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
|
||||
const u32 command_buffer_count = command.use_upload_buffer ? 2u : 1u;
|
||||
const std::array command_buffers = { command.render_command_buffer,
|
||||
command.upload_command_buffer };
|
||||
|
||||
// Wait for new swapchain image
|
||||
if (present) {
|
||||
signal_semaphores[1] = swapchain->GetRenderSemaphore();
|
||||
wait_semaphores[1] = swapchain->GetAvailableSemaphore();
|
||||
}
|
||||
// Prepeare submit info
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = wait_semaphore_count,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = command_buffer_count,
|
||||
.pCommandBuffers = command_buffers.data(),
|
||||
.signalSemaphoreCount = signal_semaphore_count,
|
||||
.pSignalSemaphores = &signal_semaphore,
|
||||
};
|
||||
|
||||
// Submit the command buffer
|
||||
auto queue = g_vk_instace->GetGraphicsQueue();
|
||||
queue.submit(submit_info);
|
||||
|
||||
// Present the image when rendering has finished
|
||||
if (present) {
|
||||
swapchain->Present();
|
||||
}
|
||||
vk::Queue queue = instance.GetGraphicsQueue();
|
||||
queue.submit(submit_info, command.fence);
|
||||
|
||||
// Block host until the GPU catches up
|
||||
if (wait_completion) {
|
||||
SyncToGPU();
|
||||
Synchronize();
|
||||
}
|
||||
|
||||
// Switch to next cmdbuffer.
|
||||
BeginTask();
|
||||
SwitchSlot();
|
||||
}
|
||||
|
||||
void TaskScheduler::Schedule(std::function<void()> func) {
|
||||
auto& task = tasks[current_task];
|
||||
task.cleanups.push_back(func);
|
||||
void CommandScheduler::Schedule(Deleter&& func) {
|
||||
auto& command = commands[current_command];
|
||||
command.cleanups.push_back(func);
|
||||
}
|
||||
|
||||
void TaskScheduler::BeginTask() {
|
||||
u32 next_task_index = (current_task + 1) % TASK_COUNT;
|
||||
auto& task = tasks[next_task_index];
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
vk::CommandBuffer CommandScheduler::GetUploadCommandBuffer() {
|
||||
CommandSlot& command = commands[current_command];
|
||||
if (!command.use_upload_buffer) {
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
// Wait for the GPU to finish with all resources for this task.
|
||||
SyncToGPU(next_task_index);
|
||||
|
||||
// Delete all resources that can be freed now
|
||||
for (auto& func : task.cleanups) {
|
||||
func();
|
||||
command.upload_command_buffer.begin(begin_info);
|
||||
command.use_upload_buffer = true;
|
||||
}
|
||||
|
||||
device.resetDescriptorPool(task.pool);
|
||||
task.command_buffers[1].begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
|
||||
|
||||
// Move to the next command buffer.
|
||||
current_task = next_task_index;
|
||||
task.task_id = ++current_task_id;
|
||||
task.current_offset = 0;
|
||||
task.use_upload_buffer = false;
|
||||
task.cleanups.clear();
|
||||
|
||||
auto& state = VulkanState::Get();
|
||||
state.InitDescriptorSets();
|
||||
return command.upload_command_buffer;
|
||||
}
|
||||
|
||||
std::unique_ptr<TaskScheduler> g_vk_task_scheduler;
|
||||
void CommandScheduler::SwitchSlot() {
|
||||
current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
|
||||
CommandSlot& command = commands[current_command];
|
||||
|
||||
// Wait for the GPU to finish with all resources for this command.
|
||||
Synchronize();
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
// Move to the next command buffer.
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.resetFences(command.fence);
|
||||
command.render_command_buffer.begin(begin_info);
|
||||
command.fence_counter = next_fence_counter++;
|
||||
command.use_upload_buffer = false;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -4,68 +4,81 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <array>
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
#include <functional>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
constexpr u32 TASK_COUNT = 5;
|
||||
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
|
||||
|
||||
class Swapchain;
|
||||
using Deleter = std::function<void(vk::Device, VmaAllocator)>;
|
||||
|
||||
/// Wrapper class around command buffer execution. Handles an arbitrary
|
||||
/// number of tasks that can be submitted concurrently. This allows the host
|
||||
/// to start recording the next frame while the GPU is working on the
|
||||
/// current one. Larger values can be used with caution, as they can cause
|
||||
/// frame latency if the CPU is too far ahead of the GPU
|
||||
class TaskScheduler {
|
||||
class Buffer;
|
||||
class Instance;
|
||||
|
||||
class CommandScheduler {
|
||||
public:
|
||||
TaskScheduler() = default;
|
||||
~TaskScheduler();
|
||||
CommandScheduler(Instance& instance);
|
||||
~CommandScheduler();
|
||||
|
||||
/// Create and initialize the work scheduler
|
||||
bool Create();
|
||||
|
||||
/// Retrieve either of the current frame's command buffers
|
||||
vk::CommandBuffer GetRenderCommandBuffer() const;
|
||||
/// Block host until the current command completes execution
|
||||
void Synchronize();
|
||||
|
||||
/// Defer operation until the current command completes execution
|
||||
void Schedule(Deleter&& func);
|
||||
|
||||
/// Submits the current command to the graphics queue
|
||||
void Submit(bool wait_completion = false, vk::Semaphore wait = VK_NULL_HANDLE,
|
||||
vk::Semaphore signal = VK_NULL_HANDLE);
|
||||
|
||||
/// Returns the command buffer used for early upload operations.
|
||||
/// This is useful for vertex/uniform buffer uploads that happen once per frame
|
||||
vk::CommandBuffer GetUploadCommandBuffer();
|
||||
vk::DescriptorPool GetDescriptorPool() const;
|
||||
|
||||
/// Access the staging buffer of the current task
|
||||
std::tuple<u8*, u32> RequestStaging(u32 size);
|
||||
Buffer& GetStaging();
|
||||
/// Returns the command buffer used for rendering
|
||||
inline vk::CommandBuffer GetRenderCommandBuffer() const {
|
||||
const CommandSlot& command = commands[current_command];
|
||||
return command.render_command_buffer;
|
||||
}
|
||||
|
||||
/// Query and/or synchronization CPU and GPU
|
||||
u64 GetCPUTick() const;
|
||||
u64 GetGPUTick() const;
|
||||
void SyncToGPU();
|
||||
void SyncToGPU(u64 task_index);
|
||||
/// Returns the upload buffer of the active command slot
|
||||
inline Buffer& GetCommandUploadBuffer() {
|
||||
CommandSlot& command = commands[current_command];
|
||||
return *command.upload_buffer;
|
||||
}
|
||||
|
||||
void Schedule(std::function<void()> func);
|
||||
void Submit(bool wait_completion = false, bool present = false, Swapchain* swapchain = nullptr);
|
||||
|
||||
void BeginTask();
|
||||
/// Returns the index of the current command slot
|
||||
inline u32 GetCurrentSlotIndex() const {
|
||||
return current_command;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Task {
|
||||
/// Activates the next command slot and optionally waits for its completion
|
||||
void SwitchSlot();
|
||||
|
||||
private:
|
||||
Instance& instance;
|
||||
u64 next_fence_counter = 1;
|
||||
u64 completed_fence_counter = 0;
|
||||
|
||||
struct CommandSlot {
|
||||
bool use_upload_buffer = false;
|
||||
u64 current_offset = 0, task_id = 0;
|
||||
std::array<vk::CommandBuffer, 2> command_buffers;
|
||||
std::vector<std::function<void()>> cleanups;
|
||||
vk::DescriptorPool pool;
|
||||
Buffer staging;
|
||||
u64 fence_counter = 0;
|
||||
vk::CommandBuffer render_command_buffer, upload_command_buffer;
|
||||
vk::Fence fence = VK_NULL_HANDLE;
|
||||
std::unique_ptr<Buffer> upload_buffer;
|
||||
std::vector<Deleter> cleanups;
|
||||
};
|
||||
|
||||
vk::Semaphore timeline;
|
||||
vk::CommandPool command_pool;
|
||||
u64 current_task_id = 0;
|
||||
|
||||
// Each task contains unique resources
|
||||
std::array<Task, TASK_COUNT> tasks;
|
||||
u64 current_task = -1;
|
||||
vk::CommandPool command_pool = VK_NULL_HANDLE;
|
||||
std::array<CommandSlot, SCHEDULER_COMMAND_COUNT> commands;
|
||||
u32 current_command = 0;
|
||||
};
|
||||
|
||||
extern std::unique_ptr<TaskScheduler> g_vk_task_scheduler;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -2,288 +2,229 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
static int BytesPerPixel(vk::Format format) {
|
||||
inline vk::Format ToVkFormat(TextureFormat format) {
|
||||
switch (format) {
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
return 5;
|
||||
case vk::Format::eD32Sfloat:
|
||||
case vk::Format::eB8G8R8A8Unorm:
|
||||
case vk::Format::eR8G8B8A8Uint:
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
return 4;
|
||||
case vk::Format::eR8G8B8Unorm:
|
||||
case vk::Format::eR8G8B8Srgb:
|
||||
return 3;
|
||||
case vk::Format::eR5G6B5UnormPack16:
|
||||
case vk::Format::eR5G5B5A1UnormPack16:
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
case vk::Format::eD16Unorm:
|
||||
return 2;
|
||||
case TextureFormat::RGBA8:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case TextureFormat::RGB8:
|
||||
return vk::Format::eR8G8B8Unorm;
|
||||
case TextureFormat::RGB5A1:
|
||||
return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case TextureFormat::RGB565:
|
||||
return vk::Format::eR5G6B5UnormPack16;
|
||||
case TextureFormat::RGBA4:
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
case TextureFormat::D16:
|
||||
return vk::Format::eD16Unorm;
|
||||
case TextureFormat::D24:
|
||||
return vk::Format::eX8D24UnormPack32;
|
||||
case TextureFormat::D24S8:
|
||||
return vk::Format::eD24UnormS8Uint;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
|
||||
return vk::Format::eUndefined;
|
||||
}
|
||||
}
|
||||
|
||||
vk::ImageAspectFlags GetImageAspect(vk::Format format) {
|
||||
vk::ImageAspectFlags flags;
|
||||
switch (format) {
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eD32Sfloat:
|
||||
flags = vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
inline vk::ImageType ToVkImageType(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::Texture1D:
|
||||
return vk::ImageType::e1D;
|
||||
case TextureType::Texture2D:
|
||||
return vk::ImageType::e2D;
|
||||
case TextureType::Texture3D:
|
||||
return vk::ImageType::e3D;
|
||||
default:
|
||||
flags = vk::ImageAspectFlagBits::eColor;
|
||||
LOG_ERROR(Render_Vulkan, "Unknown texture type {}!", type);
|
||||
return vk::ImageType::e2D;
|
||||
}
|
||||
}
|
||||
|
||||
return flags;
|
||||
inline vk::ImageViewType ToVkImageViewType(TextureViewType view_type) {
|
||||
switch (view_type) {
|
||||
case TextureViewType::View1D:
|
||||
return vk::ImageViewType::e1D;
|
||||
case TextureViewType::View2D:
|
||||
return vk::ImageViewType::e2D;
|
||||
case TextureViewType::View3D:
|
||||
return vk::ImageViewType::e3D;
|
||||
case TextureViewType::ViewCube:
|
||||
return vk::ImageViewType::eCube;
|
||||
case TextureViewType::View1DArray:
|
||||
return vk::ImageViewType::e1DArray;
|
||||
case TextureViewType::View2DArray:
|
||||
return vk::ImageViewType::e2DArray;
|
||||
case TextureViewType::ViewCubeArray:
|
||||
return vk::ImageViewType::eCubeArray;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown texture view type {}!", view_type);
|
||||
return vk::ImageViewType::e2D;
|
||||
}
|
||||
}
|
||||
|
||||
Texture::Texture(Instance& instance, CommandScheduler& scheduler) :
|
||||
instance(instance), scheduler(scheduler) {}
|
||||
|
||||
Texture::Texture(Instance& instance, CommandScheduler& scheduler,
|
||||
const TextureInfo& info) : TextureBase(info),
|
||||
instance(instance), scheduler(scheduler) {
|
||||
|
||||
// Convert the input format to another that supports attachments
|
||||
advertised_format = ToVkFormat(info.format);
|
||||
internal_format = instance.GetFormatAlternative(advertised_format);
|
||||
aspect = GetImageAspect(advertised_format);
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
const vk::ImageCreateInfo image_info = {
|
||||
.flags = info.view_type == TextureViewType::ViewCube ?
|
||||
vk::ImageCreateFlagBits::eCubeCompatible :
|
||||
vk::ImageCreateFlags{},
|
||||
.imageType = ToVkImageType(info.type),
|
||||
.format = internal_format,
|
||||
.extent = {info.width, info.height, 1},
|
||||
.mipLevels = info.levels,
|
||||
.arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = GetImageUsage(aspect),
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
|
||||
};
|
||||
|
||||
VkImage unsafe_image = VK_NULL_HANDLE;
|
||||
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
// Allocate texture memory
|
||||
vmaCreateImage(allocator, &unsafe_image_info, &alloc_info, &unsafe_image, &allocation, nullptr);
|
||||
image = vk::Image{unsafe_image};
|
||||
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = ToVkImageViewType(info.view_type),
|
||||
.format = internal_format,
|
||||
.subresourceRange = {aspect, 0, info.levels, 0, 1}
|
||||
};
|
||||
|
||||
// Create image view
|
||||
image_view = device.createImageView(view_info);
|
||||
}
|
||||
|
||||
Texture::Texture(Instance& instance, CommandScheduler& scheduler,
|
||||
vk::Image image, const TextureInfo& info) : TextureBase(info),
|
||||
instance(instance), scheduler(scheduler), image(image),
|
||||
is_texture_owned(false) {
|
||||
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = ToVkImageViewType(info.view_type),
|
||||
.format = internal_format,
|
||||
.subresourceRange = {aspect, 0, info.levels, 0, 1}
|
||||
};
|
||||
|
||||
// Create image view
|
||||
vk::Device device = instance.GetDevice();
|
||||
image_view = device.createImageView(view_info);
|
||||
}
|
||||
|
||||
Texture::~Texture() {
|
||||
Destroy();
|
||||
}
|
||||
|
||||
Texture::Texture(Texture&& other) noexcept {
|
||||
info = std::exchange(other.info, Info{});
|
||||
texture = std::exchange(other.texture, VK_NULL_HANDLE);
|
||||
aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone);
|
||||
view = std::exchange(other.view, VK_NULL_HANDLE);
|
||||
memory = std::exchange(other.memory, VK_NULL_HANDLE);
|
||||
image_size = std::exchange(other.image_size, 0);
|
||||
adopted = std::exchange(other.adopted, false);
|
||||
is_rgb = std::exchange(other.is_rgb, false);
|
||||
is_d24s8 = std::exchange(other.is_d24s8, false);
|
||||
}
|
||||
|
||||
Texture& Texture::operator=(Texture&& other) noexcept {
|
||||
Destroy();
|
||||
info = std::exchange(other.info, Info{});
|
||||
texture = std::exchange(other.texture, VK_NULL_HANDLE);
|
||||
aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone);
|
||||
view = std::exchange(other.view, VK_NULL_HANDLE);
|
||||
memory = std::exchange(other.memory, VK_NULL_HANDLE);
|
||||
image_size = std::exchange(other.image_size, 0);
|
||||
adopted = std::exchange(other.adopted, false);
|
||||
is_rgb = std::exchange(other.is_rgb, false);
|
||||
is_d24s8 = std::exchange(other.is_d24s8, false);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Texture::Create(const Info& create_info) {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
info = create_info;
|
||||
|
||||
// Emulate RGB8 format with RGBA8
|
||||
is_rgb = false;
|
||||
if (info.format == vk::Format::eR8G8B8Unorm) {
|
||||
is_rgb = true;
|
||||
info.format = vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
|
||||
is_d24s8 = false;
|
||||
if (info.format == vk::Format::eD24UnormS8Uint) {
|
||||
is_d24s8 = true;
|
||||
info.format = vk::Format::eD32SfloatS8Uint;
|
||||
}
|
||||
|
||||
// Create the texture
|
||||
image_size = info.width * info.height * BytesPerPixel(info.format);
|
||||
aspect = GetImageAspect(info.format);
|
||||
|
||||
vk::ImageCreateFlags flags{};
|
||||
if (info.view_type == vk::ImageViewType::eCube) {
|
||||
flags = vk::ImageCreateFlagBits::eCubeCompatible;
|
||||
}
|
||||
|
||||
vk::ImageCreateInfo image_info {
|
||||
flags, info.type, info.format,
|
||||
{ info.width, info.height, 1 }, info.levels, info.layers,
|
||||
static_cast<vk::SampleCountFlagBits>(info.multisamples),
|
||||
vk::ImageTiling::eOptimal, info.usage
|
||||
};
|
||||
|
||||
texture = device.createImage(image_info);
|
||||
|
||||
// Create texture memory
|
||||
auto requirements = device.getImageMemoryRequirements(texture);
|
||||
auto memory_index = Buffer::FindMemoryType(requirements.memoryTypeBits,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal);
|
||||
vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index);
|
||||
|
||||
memory = device.allocateMemory(alloc_info);
|
||||
device.bindImageMemory(texture, memory, 0);
|
||||
|
||||
// Create texture view
|
||||
vk::ImageViewCreateInfo view_info {
|
||||
{}, texture, info.view_type, info.format, {},
|
||||
{aspect, 0, info.levels, 0, info.layers}
|
||||
};
|
||||
|
||||
view = device.createImageView(view_info);
|
||||
}
|
||||
|
||||
void Texture::Create(Texture& other) {
|
||||
auto info = other.info;
|
||||
Create(info);
|
||||
|
||||
// Copy the buffer contents
|
||||
auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
|
||||
Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal);
|
||||
|
||||
auto old_layout = other.GetLayout();
|
||||
other.Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal);
|
||||
|
||||
u32 copy_count = 0;
|
||||
std::array<vk::ImageCopy, 16> copy_regions;
|
||||
|
||||
for (u32 i = 0; i < info.levels; i++) {
|
||||
copy_regions[copy_count++] = vk::ImageCopy{
|
||||
vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0},
|
||||
vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0},
|
||||
{info.width, info.height, 0}
|
||||
};
|
||||
}
|
||||
|
||||
cmdbuffer.copyImage(other.GetHandle(), vk::ImageLayout::eTransferSrcOptimal,
|
||||
texture, vk::ImageLayout::eTransferDstOptimal, copy_count,
|
||||
copy_regions.data());
|
||||
|
||||
Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
|
||||
other.Transition(cmdbuffer, old_layout);
|
||||
}
|
||||
|
||||
void Texture::Adopt(const Info& create_info, vk::Image image) {
|
||||
info = create_info;
|
||||
image_size = info.width * info.height * BytesPerPixel(info.format);
|
||||
aspect = GetImageAspect(info.format);
|
||||
texture = image;
|
||||
|
||||
// Create texture view
|
||||
vk::ImageViewCreateInfo view_info {
|
||||
{}, texture, info.view_type, info.format, {},
|
||||
{aspect, 0, info.levels, 0, info.layers}
|
||||
};
|
||||
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
view = device.createImageView(view_info);
|
||||
adopted = true;
|
||||
}
|
||||
|
||||
void Texture::Destroy() {
|
||||
if (texture && !adopted) {
|
||||
// Make sure to unbind the texture before destroying it
|
||||
auto& state = VulkanState::Get();
|
||||
state.UnbindTexture(*this);
|
||||
|
||||
auto deleter = [texture = texture,
|
||||
view = view,
|
||||
memory = memory]() {
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
if (texture) {
|
||||
device.destroyImage(texture);
|
||||
device.destroyImageView(view);
|
||||
device.freeMemory(memory);
|
||||
}
|
||||
};
|
||||
|
||||
// Schedule deletion of the texture after it's no longer used
|
||||
// by the GPU
|
||||
g_vk_task_scheduler->Schedule(deleter);
|
||||
}
|
||||
|
||||
// If the image was adopted (probably from the swapchain) then only
|
||||
// destroy the view
|
||||
if (adopted) {
|
||||
g_vk_task_scheduler->Schedule([view = view](){
|
||||
auto device = g_vk_instace->GetDevice();
|
||||
if (image && is_texture_owned) {
|
||||
auto deleter = [image = image, allocation = allocation,
|
||||
view = image_view](vk::Device device, VmaAllocator allocator) {
|
||||
device.destroyImageView(view);
|
||||
});
|
||||
vmaDestroyImage(allocator, static_cast<VkImage>(image), allocation);
|
||||
};
|
||||
|
||||
// Schedule deletion of the texture after it's no longer used by the GPU
|
||||
scheduler.Schedule(deleter);
|
||||
} else if (!is_texture_owned) {
|
||||
// If the texture is not owning, destroy the view immediately as
|
||||
// synchronization is the caller's responsibility
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroyImageView(image_view);
|
||||
}
|
||||
}
|
||||
|
||||
void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout) {
|
||||
Transition(cmdbuffer, new_layout, 0, info.levels, 0, info.layers);
|
||||
}
|
||||
void Texture::Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
|
||||
u32 level, u32 level_count) {
|
||||
ASSERT(level + level_count < TEXTURE_MAX_LEVELS);
|
||||
|
||||
void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout,
|
||||
u32 start_level, u32 level_count, u32 start_layer, u32 layer_count) {
|
||||
if (new_layout == layout) {
|
||||
// Ensure all miplevels in the range have the same layout
|
||||
vk::ImageLayout old_layout = layouts[level];
|
||||
if (old_layout != vk::ImageLayout::eUndefined) {
|
||||
for (u32 i = 0; i < level_count; i++) {
|
||||
ASSERT(layouts[level + i] == old_layout);
|
||||
}
|
||||
}
|
||||
|
||||
// Don't do anything if the image is already in the wanted layout
|
||||
if (new_layout == old_layout) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct LayoutInfo {
|
||||
vk::ImageLayout layout;
|
||||
vk::AccessFlags access;
|
||||
vk::PipelineStageFlags stage;
|
||||
};
|
||||
|
||||
// Get optimal transition settings for every image layout. Settings taken from Dolphin
|
||||
auto layout_info = [](vk::ImageLayout layout) -> LayoutInfo {
|
||||
LayoutInfo info{ .layout = layout };
|
||||
auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
|
||||
LayoutInfo info;
|
||||
switch (layout) {
|
||||
case vk::ImageLayout::eUndefined:
|
||||
// Layout undefined therefore contents undefined, and we don't care what happens to it.
|
||||
info.access = vk::AccessFlagBits::eNone;
|
||||
info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
|
||||
break;
|
||||
|
||||
case vk::ImageLayout::ePreinitialized:
|
||||
// Image has been pre-initialized by the host, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eHostWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eHost;
|
||||
break;
|
||||
|
||||
case vk::ImageLayout::eColorAttachmentOptimal:
|
||||
// Image was being used as a color attachment, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
|
||||
info.access = vk::AccessFlagBits::eColorAttachmentRead |
|
||||
vk::AccessFlagBits::eColorAttachmentWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
break;
|
||||
|
||||
case vk::ImageLayout::eDepthStencilAttachmentOptimal:
|
||||
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | vk::AccessFlagBits::eDepthStencilAttachmentWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
break;
|
||||
|
||||
case vk::ImageLayout::ePresentSrcKHR:
|
||||
info.access = vk::AccessFlagBits::eNone;
|
||||
info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
|
||||
break;
|
||||
|
||||
case vk::ImageLayout::eShaderReadOnlyOptimal:
|
||||
// Image was being used as a shader resource, make sure all reads have finished.
|
||||
info.access = vk::AccessFlagBits::eShaderRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eFragmentShader;
|
||||
break;
|
||||
|
||||
case vk::ImageLayout::eTransferSrcOptimal:
|
||||
// Image was being used as a copy source, ensure all reads have finished.
|
||||
info.access = vk::AccessFlagBits::eTransferRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eTransfer;
|
||||
break;
|
||||
|
||||
case vk::ImageLayout::eTransferDstOptimal:
|
||||
// Image was being used as a copy destination, ensure all writes have finished.
|
||||
info.access = vk::AccessFlagBits::eTransferWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eTransfer;
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
|
||||
UNREACHABLE();
|
||||
@ -292,220 +233,286 @@ void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout
|
||||
return info;
|
||||
};
|
||||
|
||||
LayoutInfo source = GetLayoutInfo(old_layout);
|
||||
LayoutInfo dest = GetLayoutInfo(new_layout);
|
||||
|
||||
const vk::ImageMemoryBarrier barrier = {
|
||||
.srcAccessMask = source.access,
|
||||
.dstAccessMask = dest.access,
|
||||
.oldLayout = old_layout,
|
||||
.newLayout = new_layout,
|
||||
.image = image,
|
||||
.subresourceRange = {aspect, level, level_count, 0, 1}
|
||||
};
|
||||
|
||||
// Submit pipeline barrier
|
||||
LayoutInfo source = layout_info(layout), dst = layout_info(new_layout);
|
||||
vk::ImageMemoryBarrier barrier {
|
||||
source.access, dst.access,
|
||||
source.layout, dst.layout,
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
|
||||
texture,
|
||||
vk::ImageSubresourceRange{aspect, start_level, level_count, start_layer, layer_count}
|
||||
command_buffer.pipelineBarrier(source.stage, dest.stage,
|
||||
vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, barrier);
|
||||
|
||||
// Update layouts
|
||||
SetLayout(new_layout, level, level_count);
|
||||
}
|
||||
|
||||
void Texture::SetLayout(vk::ImageLayout new_layout, u32 level, u32 level_count) {
|
||||
std::fill_n(layouts.begin() + level, level_count, new_layout);
|
||||
}
|
||||
|
||||
void Texture::Upload(Rect2D rectangle, u32 stride, std::span<const u8> data, u32 level) {
|
||||
const u64 byte_count = data.size();
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
|
||||
// If the adverised format supports blitting then use GPU accelerated
|
||||
// format conversion.
|
||||
if (internal_format != advertised_format &&
|
||||
instance.IsFormatSupported(advertised_format,
|
||||
vk::FormatFeatureFlagBits::eBlitSrc)) {
|
||||
// Creating a new staging texture for each upload/download is expensive
|
||||
// but this path is not common. TODO: Profile this
|
||||
StagingTexture staging{instance, scheduler, info};
|
||||
|
||||
const std::array offsets = {
|
||||
vk::Offset3D{rectangle.x, rectangle.y, 0},
|
||||
vk::Offset3D{static_cast<s32>(rectangle.x + rectangle.width),
|
||||
static_cast<s32>(rectangle.y + rectangle.height), 0}
|
||||
};
|
||||
|
||||
const vk::ImageBlit image_blit = {
|
||||
.srcSubresource = {aspect, level, 0, 1},
|
||||
.srcOffsets = offsets,
|
||||
.dstSubresource = {aspect, level, 0, 1},
|
||||
.dstOffsets = offsets
|
||||
};
|
||||
|
||||
// Copy data to staging texture
|
||||
std::memcpy(staging.GetMappedPtr(), data.data(), byte_count);
|
||||
staging.Commit(byte_count);
|
||||
|
||||
Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level);
|
||||
|
||||
// Blit
|
||||
command_buffer.blitImage(staging.GetHandle(), vk::ImageLayout::eGeneral,
|
||||
image, vk::ImageLayout::eTransferDstOptimal,
|
||||
image_blit, vk::Filter::eNearest);
|
||||
|
||||
// Otherwise use normal staging buffer path with possible CPU conversion
|
||||
} else {
|
||||
Buffer& staging = scheduler.GetCommandUploadBuffer();
|
||||
const u64 staging_offset = staging.GetCurrentOffset();
|
||||
|
||||
// Copy pixels to the staging buffer
|
||||
auto slice = staging.Map(byte_count);
|
||||
std::memcpy(slice.data(), data.data(), byte_count);
|
||||
staging.Commit(byte_count);
|
||||
|
||||
// TODO: Handle depth and stencil uploads
|
||||
ASSERT(aspect == vk::ImageAspectFlagBits::eColor &&
|
||||
advertised_format == internal_format);
|
||||
|
||||
const vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging_offset,
|
||||
.bufferRowLength = stride,
|
||||
.bufferImageHeight = rectangle.height,
|
||||
.imageSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.imageOffset = {rectangle.x, rectangle.y, 0},
|
||||
.imageExtent = {rectangle.width, rectangle.height, 1}
|
||||
};
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level);
|
||||
|
||||
// Copy staging buffer to the texture
|
||||
command_buffer.copyBufferToImage(staging.GetHandle(), image,
|
||||
vk::ImageLayout::eTransferDstOptimal,
|
||||
copy_region);
|
||||
}
|
||||
|
||||
Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal);
|
||||
}
|
||||
|
||||
void Texture::Download(Rect2D rectangle, u32 stride, std::span<u8> data, u32 level) {
|
||||
const u64 byte_count = data.size();
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
|
||||
// If the adverised format supports blitting then use GPU accelerated
|
||||
// format conversion.
|
||||
if (internal_format != advertised_format &&
|
||||
instance.IsFormatSupported(advertised_format,
|
||||
vk::FormatFeatureFlagBits::eBlitDst)) {
|
||||
// Creating a new staging texture for each upload/download is expensive
|
||||
// but this path is not common. TODO: Profile this
|
||||
StagingTexture staging{instance, scheduler, info};
|
||||
|
||||
const std::array offsets = {
|
||||
vk::Offset3D{rectangle.x, rectangle.y, 0},
|
||||
vk::Offset3D{static_cast<s32>(rectangle.x + rectangle.width),
|
||||
static_cast<s32>(rectangle.y + rectangle.height), 0}
|
||||
};
|
||||
|
||||
const vk::ImageBlit image_blit = {
|
||||
.srcSubresource = {aspect, level, 0, 1},
|
||||
.srcOffsets = offsets,
|
||||
.dstSubresource = {aspect, level, 0, 1},
|
||||
.dstOffsets = offsets
|
||||
};
|
||||
|
||||
Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level);
|
||||
|
||||
// Blit
|
||||
command_buffer.blitImage(image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
staging.GetHandle(), vk::ImageLayout::eGeneral,
|
||||
image_blit, vk::Filter::eNearest);
|
||||
|
||||
// TODO: Async downloads
|
||||
scheduler.Submit(true);
|
||||
|
||||
// Copy data to the destination
|
||||
staging.Commit(byte_count);
|
||||
std::memcpy(data.data(), staging.GetMappedPtr(), byte_count);
|
||||
|
||||
// Otherwise use normal staging buffer path with possible CPU conversion
|
||||
} else {
|
||||
Buffer& staging = scheduler.GetCommandUploadBuffer();
|
||||
const u64 staging_offset = staging.GetCurrentOffset();
|
||||
|
||||
const vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging_offset,
|
||||
.bufferRowLength = stride,
|
||||
.bufferImageHeight = rectangle.height,
|
||||
.imageSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.imageOffset = {rectangle.x, rectangle.y, 0},
|
||||
.imageExtent = {rectangle.width, rectangle.height, 1}
|
||||
};
|
||||
|
||||
Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level);
|
||||
|
||||
// Copy pixel data to the staging buffer
|
||||
command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
staging.GetHandle(), copy_region);
|
||||
|
||||
Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal);
|
||||
|
||||
// TODO: Async downloads
|
||||
scheduler.Submit(true);
|
||||
|
||||
// Copy data to the destination
|
||||
auto memory = staging.Map(byte_count);
|
||||
std::memcpy(data.data(), memory.data(), byte_count);
|
||||
}
|
||||
}
|
||||
|
||||
StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler,
|
||||
const TextureInfo& info) :
|
||||
TextureBase(info), instance(instance), scheduler(scheduler) {
|
||||
|
||||
format = ToVkFormat(info.format);
|
||||
const vk::ImageCreateInfo image_info = {
|
||||
.flags = info.view_type == TextureViewType::ViewCube ?
|
||||
vk::ImageCreateFlagBits::eCubeCompatible :
|
||||
vk::ImageCreateFlags{},
|
||||
.imageType = ToVkImageType(info.type),
|
||||
.format = format,
|
||||
.extent = {info.width, info.height, 1},
|
||||
.mipLevels = info.levels,
|
||||
.arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = vk::ImageUsageFlagBits::eTransferSrc |
|
||||
vk::ImageUsageFlagBits::eTransferDst,
|
||||
};
|
||||
|
||||
cmdbuffer.pipelineBarrier(source.stage, dst.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
|
||||
layout = new_layout;
|
||||
}
|
||||
|
||||
void Texture::OverrideImageLayout(vk::ImageLayout new_layout) {
|
||||
layout = new_layout;
|
||||
}
|
||||
|
||||
void Texture::Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> pixels) {
|
||||
u32 request_size = is_rgb ? (pixels.size() / 3) * 4 :
|
||||
(is_d24s8 ? (pixels.size() / 4) * 5 : pixels.size());
|
||||
auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size);
|
||||
if (!buffer) {
|
||||
LOG_ERROR(Render_Vulkan, "Cannot upload pixels without staging buffer!");
|
||||
}
|
||||
|
||||
// Copy pixels to staging buffer
|
||||
auto& state = VulkanState::Get();
|
||||
state.EndRendering();
|
||||
|
||||
auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
|
||||
|
||||
// Automatically convert RGB to RGBA
|
||||
if (is_rgb) {
|
||||
auto data = RGBToRGBA(pixels);
|
||||
std::memcpy(buffer, data.data(), data.size());
|
||||
}
|
||||
else if (is_d24s8) {
|
||||
auto data = D24S8ToD32S8(pixels);
|
||||
std::memcpy(buffer, data.data(), data.size() * sizeof(data[0]));
|
||||
}
|
||||
else {
|
||||
std::memcpy(buffer, pixels.data(), pixels.size());
|
||||
}
|
||||
|
||||
std::array<vk::BufferImageCopy, 2> copy_regions;
|
||||
u32 region_count = 1;
|
||||
|
||||
copy_regions[0] = vk::BufferImageCopy{
|
||||
offset, row_length, region.extent.height,
|
||||
{aspect, level, layer, 1},
|
||||
{region.offset.x, region.offset.y, 0},
|
||||
{region.extent.width, region.extent.height, 1}
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO
|
||||
};
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth &&
|
||||
aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
// Copying both depth and stencil requires two seperate regions
|
||||
copy_regions[1] = copy_regions[0];
|
||||
copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
|
||||
copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
|
||||
VkImage unsafe_image = VK_NULL_HANDLE;
|
||||
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
region_count++;
|
||||
}
|
||||
// Allocate texture memory
|
||||
vmaCreateImage(allocator, &unsafe_image_info, &alloc_create_info,
|
||||
&unsafe_image, &allocation, &alloc_info);
|
||||
image = vk::Image{unsafe_image};
|
||||
|
||||
// Transition image to transfer format
|
||||
Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal);
|
||||
// Map memory
|
||||
mapped_ptr = alloc_info.pMappedData;
|
||||
|
||||
cmdbuffer.copyBufferToImage(g_vk_task_scheduler->GetStaging().GetBuffer(),
|
||||
texture, vk::ImageLayout::eTransferDstOptimal, region_count,
|
||||
copy_regions.data());
|
||||
|
||||
// Prepare image for shader reads
|
||||
Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
|
||||
}
|
||||
|
||||
void Texture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> memory) {
|
||||
u32 request_size = is_rgb ? (memory.size() / 3) * 4 :
|
||||
(is_d24s8 ? (memory.size() / 4) * 8 : memory.size());
|
||||
auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size);
|
||||
if (!buffer) {
|
||||
LOG_ERROR(Render_Vulkan, "Cannot download texture without staging buffer!");
|
||||
}
|
||||
|
||||
auto& state = VulkanState::Get();
|
||||
state.EndRendering();
|
||||
|
||||
auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
|
||||
|
||||
std::array<vk::BufferImageCopy, 2> copy_regions;
|
||||
u32 region_count = 1;
|
||||
|
||||
copy_regions[0] = vk::BufferImageCopy{
|
||||
offset, row_length, region.extent.height,
|
||||
{aspect, level, layer, 1},
|
||||
{region.offset.x, region.offset.y, 0},
|
||||
{region.extent.width, region.extent.height, 1}
|
||||
// Transition image to VK_IMAGE_LAYOUT_GENERAL. This layout is convenient
|
||||
// for staging textures since it allows for well defined host access and
|
||||
// works with vkCmdBlitImage, thus eliminating the need for layout transitions
|
||||
const vk::ImageMemoryBarrier barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eNone,
|
||||
.dstAccessMask = vk::AccessFlagBits::eNone,
|
||||
.oldLayout = vk::ImageLayout::eUndefined,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.image = image,
|
||||
.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, info.levels, 0, 1}
|
||||
};
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth &&
|
||||
aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
// Copying both depth and stencil requires two seperate regions
|
||||
copy_regions[1] = copy_regions[0];
|
||||
copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
|
||||
copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
|
||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eBottomOfPipe,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, barrier);
|
||||
}
|
||||
|
||||
region_count++;
|
||||
}
|
||||
StagingTexture::~StagingTexture() {
|
||||
if (image) {
|
||||
auto deleter = [allocation = allocation,
|
||||
image = image](vk::Device device, VmaAllocator allocator) {
|
||||
vmaDestroyImage(allocator, static_cast<VkImage>(image), allocation);
|
||||
};
|
||||
|
||||
// Transition image to transfer format
|
||||
auto old_layout = GetLayout();
|
||||
Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal);
|
||||
|
||||
cmdbuffer.copyImageToBuffer(texture, vk::ImageLayout::eTransferSrcOptimal,
|
||||
g_vk_task_scheduler->GetStaging().GetBuffer(),
|
||||
region_count, copy_regions.data());
|
||||
|
||||
// Restore layout
|
||||
Transition(cmdbuffer, old_layout);
|
||||
|
||||
// Wait for the data to be available
|
||||
// NOTE: This is really slow and should be reworked
|
||||
g_vk_task_scheduler->Submit(true);
|
||||
|
||||
// Automatically convert RGB to RGBA
|
||||
if (is_rgb) {
|
||||
auto data = RGBAToRGB(std::span(buffer, request_size));
|
||||
std::memcpy(memory.data(), data.data(), memory.size());
|
||||
}
|
||||
else if (is_d24s8) {
|
||||
auto data = D32S8ToD24S8(std::span(buffer, request_size));
|
||||
std::memcpy(memory.data(), data.data(), memory.size());
|
||||
}
|
||||
else {
|
||||
std::memcpy(memory.data(), buffer, memory.size());
|
||||
// Schedule deletion of the texture after it's no longer used by the GPU
|
||||
scheduler.Schedule(deleter);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Out, typename In>
|
||||
std::span<Out> SpanCast(std::span<In> span) {
|
||||
return std::span(reinterpret_cast<Out*>(span.data()), span.size_bytes() / sizeof(Out));
|
||||
void StagingTexture::Commit(u32 size) {
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaFlushAllocation(allocator, allocation, 0, size);
|
||||
}
|
||||
|
||||
std::vector<u8> Texture::RGBToRGBA(std::span<u8> data) {
|
||||
ASSERT(data.size() % 3 == 0);
|
||||
Sampler::Sampler(Instance& instance, SamplerInfo info) :
|
||||
SamplerBase(info), instance(instance) {
|
||||
|
||||
u32 new_size = (data.size() / 3) * 4;
|
||||
std::vector<u8> rgba(new_size);
|
||||
auto properties = instance.GetPhysicalDevice().getProperties();
|
||||
const auto filtering = PicaToVK::TextureFilterMode(info.mag_filter,
|
||||
info.min_filter,
|
||||
info.mip_filter);
|
||||
const vk::SamplerCreateInfo sampler_info = {
|
||||
.magFilter = filtering.mag_filter,
|
||||
.minFilter = filtering.min_filter,
|
||||
.mipmapMode = filtering.mip_mode,
|
||||
.addressModeU = PicaToVK::WrapMode(info.wrap_s),
|
||||
.addressModeV = PicaToVK::WrapMode(info.wrap_t),
|
||||
.anisotropyEnable = true,
|
||||
.maxAnisotropy = properties.limits.maxSamplerAnisotropy,
|
||||
.compareEnable = false,
|
||||
.compareOp = vk::CompareOp::eAlways,
|
||||
.borderColor = vk::BorderColor::eIntOpaqueBlack,
|
||||
.unnormalizedCoordinates = false
|
||||
};
|
||||
|
||||
u32 dst_pos = 0;
|
||||
for (u32 i = 0; i < data.size(); i += 3) {
|
||||
std::memcpy(rgba.data() + dst_pos, data.data() + i, 3);
|
||||
rgba[dst_pos + 3] = 255u;
|
||||
dst_pos += 4;
|
||||
}
|
||||
|
||||
return rgba;
|
||||
vk::Device device = instance.GetDevice();
|
||||
sampler = device.createSampler(sampler_info);
|
||||
}
|
||||
|
||||
std::vector<u64> Texture::D24S8ToD32S8(std::span<u8> data) {
|
||||
ASSERT(data.size() % 4 == 0);
|
||||
|
||||
std::vector<u64> d32s8;
|
||||
std::span<u32> d24s8 = SpanCast<u32>(data);
|
||||
|
||||
d32s8.reserve(data.size() * 2);
|
||||
std::ranges::transform(d24s8, std::back_inserter(d32s8), [](u32 comp) -> u64 {
|
||||
// Convert normalized 24bit depth component to floating point
|
||||
float fdepth = static_cast<float>(comp & 0xFFFFFF) / 0xFFFFFF;
|
||||
u64 result = static_cast<u64>(comp) << 8;
|
||||
|
||||
// Use std::memcpy to avoid the unsafe casting required to preserve the floating
|
||||
// point bits
|
||||
std::memcpy(&result, &fdepth, 4);
|
||||
return result;
|
||||
});
|
||||
|
||||
return d32s8;
|
||||
}
|
||||
|
||||
std::vector<u8> Texture::RGBAToRGB(std::span<u8> data) {
|
||||
ASSERT(data.size() % 4 == 0);
|
||||
|
||||
u32 new_size = (data.size() / 4) * 3;
|
||||
std::vector<u8> rgb(new_size);
|
||||
|
||||
u32 dst_pos = 0;
|
||||
for (u32 i = 0; i < data.size(); i += 4) {
|
||||
std::memcpy(rgb.data() + dst_pos, data.data() + i, 3);
|
||||
dst_pos += 3;
|
||||
}
|
||||
|
||||
return rgb;
|
||||
}
|
||||
|
||||
std::vector<u32> Texture::D32S8ToD24S8(std::span<u8> data) {
|
||||
ASSERT(data.size() % 8 == 0);
|
||||
|
||||
std::vector<u32> d24s8;
|
||||
std::span<u64> d32s8 = SpanCast<u64>(data);
|
||||
|
||||
d24s8.reserve(data.size() / 2);
|
||||
std::ranges::transform(d32s8, std::back_inserter(d24s8), [](u64 comp) -> u32 {
|
||||
// Convert floating point to 24bit normalized depth
|
||||
float fdepth = 0.f;
|
||||
u32 depth = comp & 0xFFFFFFFF;
|
||||
std::memcpy(&fdepth, &depth, 4);
|
||||
|
||||
u32 stencil = (comp >> 32) & 0xFF;
|
||||
u64 result = static_cast<u32>(fdepth * 0xFFFFFF) | (stencil << 24);
|
||||
return result;
|
||||
});
|
||||
|
||||
return d24s8;
|
||||
Sampler::~Sampler() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroySampler(sampler);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -4,80 +4,147 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <functional>
|
||||
#include <glm/glm.hpp>
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_surface_params.h"
|
||||
#include "video_core/common/texture.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace VideoCore::Vulkan {
|
||||
|
||||
/// Vulkan texture object
|
||||
class Texture final : public NonCopyable {
|
||||
// PICA texture have at most 8 mipmap levels
|
||||
constexpr u32 TEXTURE_MAX_LEVELS = 8;
|
||||
|
||||
class Instance;
|
||||
class CommandScheduler;
|
||||
|
||||
/**
|
||||
* A texture located in GPU memory
|
||||
*/
|
||||
class Texture : public VideoCore::TextureBase {
|
||||
public:
|
||||
/// Information for the creation of the target texture
|
||||
struct Info {
|
||||
u32 width, height;
|
||||
vk::Format format;
|
||||
vk::ImageType type;
|
||||
vk::ImageViewType view_type;
|
||||
vk::ImageUsageFlags usage;
|
||||
u32 multisamples = 1;
|
||||
u32 levels = 1, layers = 1;
|
||||
};
|
||||
// Default constructor
|
||||
Texture(Instance& instance, CommandScheduler& scheduler);
|
||||
|
||||
// Constructor for texture creation
|
||||
Texture(Instance& instance, CommandScheduler& scheduler,
|
||||
const TextureInfo& info);
|
||||
|
||||
// Constructor for not owning textures (swapchain)
|
||||
Texture(Instance& instance, CommandScheduler& scheduler,
|
||||
vk::Image image, const TextureInfo& info);
|
||||
|
||||
Texture() = default;
|
||||
~Texture();
|
||||
|
||||
/// Enable move operations
|
||||
Texture(Texture&& other) noexcept;
|
||||
Texture& operator=(Texture&& other) noexcept;
|
||||
/// Uploads pixel data to the GPU memory
|
||||
void Upload(Rect2D rectangle, u32 stride, std::span<const u8> data,
|
||||
u32 level = 0) override;
|
||||
|
||||
/// Create a new Vulkan texture object
|
||||
void Create(const Info& info);
|
||||
void Create(Texture& texture);
|
||||
void Adopt(const Info& info, vk::Image image);
|
||||
void Destroy();
|
||||
/// Downloads pixel data from GPU memory
|
||||
void Download(Rect2D rectangle, u32 stride, std::span<u8> data,
|
||||
u32 level = 0) override;
|
||||
|
||||
/// Query objects
|
||||
bool IsValid() const { return texture; }
|
||||
vk::Image GetHandle() const { return texture; }
|
||||
vk::ImageView GetView() const { return view; }
|
||||
vk::Format GetFormat() const { return info.format; }
|
||||
vk::ImageLayout GetLayout() const { return layout; }
|
||||
u32 GetSamples() const { return info.multisamples; }
|
||||
u32 GetSize() const { return image_size; }
|
||||
vk::Rect2D GetArea() const { return {{0, 0},{info.width, info.height}}; }
|
||||
/// Copies the rectangle area specified to the destionation texture
|
||||
void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect,
|
||||
u32 src_level = 0, u32 dest_level = 0) override;
|
||||
|
||||
/// Copies CPU side pixel data to the GPU texture buffer
|
||||
void Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> pixels);
|
||||
void Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> dst);
|
||||
/// Overrides the layout of provided image subresource
|
||||
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
|
||||
|
||||
/// Used to transition the image to an optimal layout during transfers
|
||||
void OverrideImageLayout(vk::ImageLayout new_layout);
|
||||
void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout);
|
||||
void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout, u32 start_level, u32 level_count,
|
||||
u32 start_layer, u32 layer_count);
|
||||
/// Transitions part of the image to the provided layout
|
||||
void Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
|
||||
u32 level = 0, u32 level_count = 1);
|
||||
|
||||
/// Returns the underlying vulkan image handle
|
||||
vk::Image GetHandle() const {
|
||||
return image;
|
||||
}
|
||||
|
||||
/// Returns the Vulka image view
|
||||
vk::ImageView GetView() const {
|
||||
return image_view;
|
||||
}
|
||||
|
||||
/// Returns the internal format backing the texture.
|
||||
/// It may not match the input pixel format.
|
||||
vk::Format GetInternalFormat() const {
|
||||
return internal_format;
|
||||
}
|
||||
|
||||
/// Returns the current image layout
|
||||
vk::ImageLayout GetLayout(u32 level = 0) const {
|
||||
return layouts.at(level);
|
||||
}
|
||||
|
||||
/// Returns a rectangle that represents the complete area of the texture
|
||||
vk::Rect2D GetArea() const {
|
||||
return {{0, 0},{info.width, info.height}};
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<u8> RGBToRGBA(std::span<u8> data);
|
||||
std::vector<u64> D24S8ToD32S8(std::span<u8> data);
|
||||
Instance& instance;
|
||||
CommandScheduler& scheduler;
|
||||
|
||||
std::vector<u8> RGBAToRGB(std::span<u8> data);
|
||||
std::vector<u32> D32S8ToD24S8(std::span<u8> data);
|
||||
// Vulkan texture handle
|
||||
vk::Image image = VK_NULL_HANDLE;
|
||||
vk::ImageView image_view = VK_NULL_HANDLE;
|
||||
VmaAllocation allocation = nullptr;
|
||||
bool is_texture_owned = true;
|
||||
|
||||
// Texture properties
|
||||
vk::Format advertised_format = vk::Format::eUndefined;
|
||||
vk::Format internal_format = vk::Format::eUndefined;
|
||||
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
|
||||
std::array<vk::ImageLayout, TEXTURE_MAX_LEVELS> layouts;
|
||||
};
|
||||
|
||||
/**
|
||||
* Staging texture located in CPU memory. Used for intermediate format
|
||||
* conversions
|
||||
*/
|
||||
class StagingTexture : public VideoCore::TextureBase {
|
||||
public:
|
||||
StagingTexture(Instance& instance, CommandScheduler& scheduler,
|
||||
const TextureInfo& info);
|
||||
~StagingTexture();
|
||||
|
||||
/// Flushes any writes made to texture memory
|
||||
void Commit(u32 size);
|
||||
|
||||
/// Returns a span of the mapped texture memory
|
||||
void* GetMappedPtr() {
|
||||
return mapped_ptr;
|
||||
}
|
||||
|
||||
/// Returns the staging image handle
|
||||
vk::Image GetHandle() const {
|
||||
return image;
|
||||
}
|
||||
|
||||
private:
|
||||
Texture::Info info{};
|
||||
vk::ImageLayout layout{};
|
||||
vk::ImageAspectFlags aspect{};
|
||||
vk::Image texture;
|
||||
vk::ImageView view;
|
||||
vk::DeviceMemory memory;
|
||||
u32 image_size{};
|
||||
bool adopted{false};
|
||||
bool is_rgb{false}, is_d24s8{false};
|
||||
Instance& instance;
|
||||
CommandScheduler& scheduler;
|
||||
|
||||
vk::Image image = VK_NULL_HANDLE;
|
||||
VmaAllocation allocation = VK_NULL_HANDLE;
|
||||
vk::Format format = vk::Format::eUndefined;
|
||||
u32 capacity = 0;
|
||||
void* mapped_ptr = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* Vulkan sampler object
|
||||
*/
|
||||
class Sampler : public VideoCore::SamplerBase {
|
||||
public:
|
||||
Sampler(Instance& instance, SamplerInfo info);
|
||||
~Sampler() override;
|
||||
|
||||
/// Returns the underlying vulkan sampler handle
|
||||
vk::Sampler GetHandle() const {
|
||||
return sampler;
|
||||
}
|
||||
|
||||
private:
|
||||
Instance& instance;
|
||||
vk::Sampler sampler;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -56,9 +56,9 @@ struct OutputVertex {
|
||||
Common::Vec2<float24> tc0;
|
||||
Common::Vec2<float24> tc1;
|
||||
float24 tc0_w;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
INSERT_PADDING_WORDS_NOINIT(1);
|
||||
Common::Vec3<float24> view;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
INSERT_PADDING_WORDS_NOINIT(1);
|
||||
Common::Vec2<float24> tc2;
|
||||
|
||||
static void ValidateSemantics(const RasterizerRegs& regs);
|
||||
|
@ -164,8 +164,10 @@ static void LogCritical(const char* msg) {
|
||||
|
||||
void JitShader::Compile_Assert(bool condition, const char* msg) {
|
||||
if (!condition) {
|
||||
ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
mov(ABI_PARAM1, reinterpret_cast<std::size_t>(msg));
|
||||
CallFarFunction(*this, LogCritical);
|
||||
ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -595,11 +597,11 @@ void JitShader::Compile_END(Instruction instr) {
|
||||
}
|
||||
|
||||
void JitShader::Compile_BREAKC(Instruction instr) {
|
||||
Compile_Assert(looping, "BREAKC must be inside a LOOP");
|
||||
if (looping) {
|
||||
Compile_Assert(loop_depth, "BREAKC must be inside a LOOP");
|
||||
if (loop_depth) {
|
||||
Compile_EvaluateCondition(instr);
|
||||
ASSERT(loop_break_label);
|
||||
jnz(*loop_break_label);
|
||||
ASSERT(!loop_break_labels.empty());
|
||||
jnz(loop_break_labels.back(), T_NEAR);
|
||||
}
|
||||
}
|
||||
|
||||
@ -725,9 +727,11 @@ void JitShader::Compile_IF(Instruction instr) {
|
||||
void JitShader::Compile_LOOP(Instruction instr) {
|
||||
Compile_Assert(instr.flow_control.dest_offset >= program_counter,
|
||||
"Backwards loops not supported");
|
||||
Compile_Assert(!looping, "Nested loops not supported");
|
||||
|
||||
looping = true;
|
||||
Compile_Assert(loop_depth < 1, "Nested loops may not be supported");
|
||||
if (loop_depth++) {
|
||||
const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
|
||||
ABI_PushRegistersAndAdjustStack(*this, loop_save_regs, 0);
|
||||
}
|
||||
|
||||
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
|
||||
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
|
||||
@ -746,16 +750,20 @@ void JitShader::Compile_LOOP(Instruction instr) {
|
||||
Label l_loop_start;
|
||||
L(l_loop_start);
|
||||
|
||||
loop_break_label = Xbyak::Label();
|
||||
loop_break_labels.emplace_back(Xbyak::Label());
|
||||
Compile_Block(instr.flow_control.dest_offset + 1);
|
||||
|
||||
add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
|
||||
sub(LOOPCOUNT, 1); // Increment loop count by 1
|
||||
jnz(l_loop_start); // Loop if not equal
|
||||
L(*loop_break_label);
|
||||
loop_break_label.reset();
|
||||
|
||||
looping = false;
|
||||
L(loop_break_labels.back());
|
||||
loop_break_labels.pop_back();
|
||||
|
||||
if (--loop_depth) {
|
||||
const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
|
||||
ABI_PopRegistersAndAdjustStack(*this, loop_save_regs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void JitShader::Compile_JMP(Instruction instr) {
|
||||
@ -892,7 +900,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||
// Reset flow control state
|
||||
program = (CompiledShader*)getCurr();
|
||||
program_counter = 0;
|
||||
looping = false;
|
||||
loop_depth = 0;
|
||||
instruction_labels.fill(Xbyak::Label());
|
||||
|
||||
// Find all `CALL` instructions and identify return locations
|
||||
|
@ -120,15 +120,15 @@ private:
|
||||
/// Mapping of Pica VS instructions to pointers in the emitted code
|
||||
std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels;
|
||||
|
||||
/// Label pointing to the end of the current LOOP block. Used by the BREAKC instruction to break
|
||||
/// out of the loop.
|
||||
std::optional<Xbyak::Label> loop_break_label;
|
||||
/// Labels pointing to the end of each nested LOOP block. Used by the BREAKC instruction to
|
||||
/// break out of a loop.
|
||||
std::vector<Xbyak::Label> loop_break_labels;
|
||||
|
||||
/// Offsets in code where a return needs to be inserted
|
||||
std::vector<unsigned> return_offsets;
|
||||
|
||||
unsigned program_counter = 0; ///< Offset of the next instruction to decode
|
||||
bool looping = false; ///< True if compiling a loop, used to check for nested loops
|
||||
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
|
||||
|
||||
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
|
||||
CompiledShader* program = nullptr;
|
||||
|
@ -13,7 +13,7 @@ namespace Clipper {
|
||||
|
||||
using Shader::OutputVertex;
|
||||
|
||||
void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2);
|
||||
void ProcessTriangle(const & v0, const OutputVertex& v1, const OutputVertex& v2);
|
||||
|
||||
} // namespace Clipper
|
||||
} // namespace Pica
|
||||
|
@ -46,7 +46,6 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory)
|
||||
|
||||
OpenGL::GLES = Settings::values.use_gles;
|
||||
|
||||
//g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window);
|
||||
g_renderer = std::make_unique<Vulkan::RendererVulkan>(emu_window);
|
||||
ResultStatus result = g_renderer->Init();
|
||||
|
||||
|
@ -6,8 +6,8 @@
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "core/frontend/emu_window.h"
|
||||
|
||||
namespace Frontend {
|
||||
|
Reference in New Issue
Block a user