diff --git a/CMakeLists.txt b/CMakeLists.txt index 800f7747a..95e0d6aea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,6 +157,7 @@ set(REQUIRED_LIBRARIES inih lodepng glslang + robin-hood-hashing zstd ) @@ -297,6 +298,7 @@ set(REQUIRED_PACKAGES zstd unofficial-enet lodepng + robin_hood ) foreach(PACKAGE ${REQUIRED_PACKAGES}) diff --git a/CMakeModules/VcpkgCmakeUtils.cmake b/CMakeModules/VcpkgCmakeUtils.cmake index 13333092a..9c34d40ba 100644 --- a/CMakeModules/VcpkgCmakeUtils.cmake +++ b/CMakeModules/VcpkgCmakeUtils.cmake @@ -9,7 +9,7 @@ endif() # Configure vcpkg set(VCPKG_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/externals/vcpkg") if (WIN32) - execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat") + #execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat") set(VCPKG_EXECUTABLE "${VCPKG_DIRECTORY}/vcpkg.exe") else() execute_process(COMMAND bash "${VCPKG_DIRECTORY}/bootstrap-vcpkg.sh") diff --git a/src/audio_core/lle/lle.cpp b/src/audio_core/lle/lle.cpp index 5786e156c..833b3873b 100644 --- a/src/audio_core/lle/lle.cpp +++ b/src/audio_core/lle/lle.cpp @@ -450,7 +450,7 @@ void DspLle::SetServiceToInterrupt(std::weak_ptr dsp) { return; if (pipe == 0) { // pipe 0 is for debug. 3DS automatically drains this pipe and discards the data - impl->ReadPipe(pipe, impl->GetPipeReadableSize(pipe)); + impl->ReadPipe(static_cast(pipe), impl->GetPipeReadableSize(pipe)); } else { std::lock_guard lock(HLE::g_hle_lock); if (auto locked = dsp.lock()) { diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 36f0502b1..ecf5090e6 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -944,16 +944,14 @@ void Config::SaveMultiplayerValues() { // Write ban list qt_config->beginWriteArray(QStringLiteral("username_ban_list")); for (std::size_t i = 0; i < UISettings::values.ban_list.first.size(); ++i) { - int index = static_cast(i); - qt_config->setArrayIndex(index); + qt_config->setArrayIndex(static_cast(i)); WriteSetting(QStringLiteral("username"), QString::fromStdString(UISettings::values.ban_list.first[i])); } qt_config->endArray(); qt_config->beginWriteArray(QStringLiteral("ip_ban_list")); for (std::size_t i = 0; i < UISettings::values.ban_list.second.size(); ++i) { - int index = static_cast(i); - qt_config->setArrayIndex(index); + qt_config->setArrayIndex(static_cast(i)); WriteSetting(QStringLiteral("ip"), QString::fromStdString(UISettings::values.ban_list.second[i])); } diff --git a/src/citra_qt/configuration/configure_camera.cpp b/src/citra_qt/configuration/configure_camera.cpp index ff6f50ed2..b58ea95d3 100644 --- a/src/citra_qt/configuration/configure_camera.cpp +++ b/src/citra_qt/configuration/configure_camera.cpp @@ -256,8 +256,7 @@ void ConfigureCamera::SetConfiguration() { int index = GetSelectedCameraIndex(); for (std::size_t i = 0; i < Implementations.size(); i++) { if (Implementations[i] == camera_name[index]) { - int current_index = static_cast(i); - ui->image_source->setCurrentIndex(current_index); + ui->image_source->setCurrentIndex(static_cast(i)); } } if (camera_name[index] == "image") { diff --git a/src/citra_qt/debugger/ipc/recorder.cpp b/src/citra_qt/debugger/ipc/recorder.cpp index b4ed84ad0..4acc693d5 100644 --- a/src/citra_qt/debugger/ipc/recorder.cpp +++ b/src/citra_qt/debugger/ipc/recorder.cpp @@ -76,7 +76,7 @@ void IPCRecorderWidget::OnEntryUpdated(IPCDebugger::RequestRecord record) { QTreeWidgetItem entry{ {QString::number(record.id), GetStatusStr(record), service, GetFunctionName(record)}}; - const int row_id = record.id - id_offset; + const std::size_t row_id = record.id - id_offset; if (ui->main->invisibleRootItem()->childCount() > row_id) { records[row_id] = record; (*ui->main->invisibleRootItem()->child(row_id)) = entry; diff --git a/src/citra_qt/debugger/ipc/recorder.h b/src/citra_qt/debugger/ipc/recorder.h index 63b190790..06cf7c1ba 100644 --- a/src/citra_qt/debugger/ipc/recorder.h +++ b/src/citra_qt/debugger/ipc/recorder.h @@ -45,7 +45,7 @@ private: // The offset between record id and row id, assuming record ids are assigned // continuously and only the 'Clear' action can be performed, this is enough. // The initial value is 1, which means record 1 = row 0. - int id_offset = 1; + std::size_t id_offset = 1; std::vector records; }; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 56fb77568..0b1509fed 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -60,6 +60,7 @@ add_library(common STATIC detached_tasks.cpp detached_tasks.h bit_field.h + bit_field_array.h bit_set.h cityhash.cpp cityhash.h @@ -72,6 +73,7 @@ add_library(common STATIC file_util.h flag.h hash.h + intrusive_ptr.h linear_disk_cache.h logging/backend.cpp logging/backend.h @@ -87,6 +89,8 @@ add_library(common STATIC microprofile.h microprofileui.h misc.cpp + object_pool.cpp + object_pool.h param_package.cpp param_package.h quaternion.h diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 4ec3af1af..ae3c5753d 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -36,6 +36,18 @@ #include "common/common_funcs.h" #include "common/swap.h" +// User defined types to need to specialize this +template +struct MakeUnsigned { + using type = std::make_unsigned_t; +}; + +// Ensure that user defined types are sane +template +concept ValidType = requires(T t) { + static_cast::type>(t); +}; + /* * Abstract bitfield class * @@ -110,6 +122,7 @@ */ #pragma pack(1) template + requires ValidType struct BitField { private: // UnderlyingType is T for non-enum types and the underlying type of T if @@ -120,7 +133,7 @@ private: std::enable_if>::type; // We store the value as the unsigned type to avoid undefined behaviour on value shifting - using StorageType = std::make_unsigned_t; + using StorageType = typename MakeUnsigned::type; using StorageTypeWithEndian = typename AddEndian::type; @@ -199,3 +212,38 @@ private: template using BitFieldBE = BitField; + +/** + * Abstract bit flag class. This is basically a specialization of BitField for single-bit fields. + * Instead of being cast to the underlying type, it acts like a boolean. + */ +#pragma pack(1) +template +struct BitFlag : protected BitField { +private: + BitFlag(T val) = delete; + + using ParentType = BitField; + +public: + BitFlag() = default; + BitFlag& operator=(const BitFlag&) = delete; + + constexpr BitFlag& operator=(bool val) { + Assign(val); + return *this; + } + + constexpr void Assign(bool value) { + ParentType::Assign(value); + } + + [[nodiscard]] constexpr operator bool() const { + return Value(); + } + + [[nodiscard]] constexpr bool Value() const { + return ParentType::Value() != 0; + } +}; +#pragma pack() diff --git a/src/common/bit_field_array.h b/src/common/bit_field_array.h new file mode 100644 index 000000000..326f21eb9 --- /dev/null +++ b/src/common/bit_field_array.h @@ -0,0 +1,287 @@ +#pragma once + +#include +#include +#include +#include "common/swap.h" + +// Language limitations require the following to make these formattable +// (formatter::Ref> is not legal) +template +class BitFieldArrayConstRef; +template +class BitFieldArrayRef; +template +class BitFieldArrayConstIterator; +template +class BitFieldArrayIterator; + +#pragma pack(1) +template ::type directly. + typename StorageType = typename std::conditional_t< + std::is_enum::value, std::underlying_type, std::enable_if>::type> +struct BitFieldArray +{ + using Ref = BitFieldArrayRef; + using ConstRef = BitFieldArrayConstRef; + using Iterator = BitFieldArrayIterator; + using ConstIterator = BitFieldArrayConstIterator; + +private: + // This constructor might be considered ambiguous: + // Would it initialize the storage or just the bitfield? + // Hence, delete it. Use the assignment operator to set bitfield values! + BitFieldArray(T val) = delete; + +public: + // Force default constructor to be created + // so that we can use this within unions + constexpr BitFieldArray() = default; + + // Initializer list constructor + constexpr BitFieldArray(std::initializer_list items) : storage(StorageType{}) { + u32 index = 0; + for (auto& item : items) { + SetValue(index++, item); + } + } + + // We explicitly delete the copy assignment operator here, because the + // default copy assignment would copy the full storage value, rather than + // just the bits relevant to this particular bit field. + // Ideally, we would just implement the copy assignment to copy only the + // relevant bits, but we're prevented from doing that because the savestate + // code expects that this class is trivially copyable. + BitFieldArray& operator=(const BitFieldArray&) = delete; + +public: + constexpr bool IsSigned() const { return std::is_signed(); } + constexpr std::size_t StartBit() const { return position; } + constexpr std::size_t NumBits() const { return bits; } + constexpr std::size_t Size() const { return size; } + constexpr std::size_t TotalNumBits() const { return bits * size; } + + constexpr T Value(size_t index) const { return Value(std::is_signed(), index); } + constexpr void SetValue(size_t index, T value) { + const size_t pos = position + bits * index; + storage = (storage & ~GetElementMask(index)) | + ((static_cast(value) << pos) & GetElementMask(index)); + } + Ref operator[](size_t index) { return Ref(this, index); } + constexpr const ConstRef operator[](size_t index) const { return ConstRef(this, index); } + + constexpr Iterator begin() { return Iterator(this, 0); } + constexpr Iterator end() { return Iterator(this, size); } + constexpr ConstIterator begin() const { return ConstIterator(this, 0); } + constexpr ConstIterator end() const { return ConstIterator(this, size); } + constexpr ConstIterator cbegin() const { return begin(); } + constexpr ConstIterator cend() const { return end(); } + +private: + // Unsigned version of StorageType + using StorageTypeU = std::make_unsigned_t; + + constexpr T Value(std::true_type, size_t index) const + { + const size_t pos = position + bits * index; + const size_t shift_amount = 8 * sizeof(StorageType) - bits; + return static_cast((storage << (shift_amount - pos)) >> shift_amount); + } + + constexpr T Value(std::false_type, size_t index) const + { + const size_t pos = position + bits * index; + return static_cast((storage & GetElementMask(index)) >> pos); + } + + static constexpr StorageType GetElementMask(size_t index) + { + const size_t pos = position + bits * index; + return (std::numeric_limits::max() >> (8 * sizeof(StorageType) - bits)) << pos; + } + + StorageType storage; + + static_assert(bits * size + position <= 8 * sizeof(StorageType), "Bitfield array out of range"); + static_assert(sizeof(T) <= sizeof(StorageType), "T must fit in StorageType"); + + // And, you know, just in case people specify something stupid like bits=position=0x80000000 + static_assert(position < 8 * sizeof(StorageType), "Invalid position"); + static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); + static_assert(bits > 0, "Invalid number of bits"); + static_assert(size <= 8 * sizeof(StorageType), "Invalid size"); + static_assert(size > 0, "Invalid size"); +}; +#pragma pack() + +template +class BitFieldArrayConstRef +{ + friend struct BitFieldArray; + friend class BitFieldArrayConstIterator; + +public: + constexpr T Value() const { return m_array->Value(m_index); }; + constexpr operator T() const { return Value(); } + +private: + constexpr BitFieldArrayConstRef(const BitFieldArray* array, + size_t index) + : m_array(array), m_index(index) + { + } + + const BitFieldArray* const m_array; + const size_t m_index; +}; + +template +class BitFieldArrayRef +{ + friend struct BitFieldArray; + friend class BitFieldArrayIterator; + +public: + constexpr T Value() const { return m_array->Value(m_index); }; + constexpr operator T() const { return Value(); } + T operator=(const BitFieldArrayRef& value) const + { + m_array->SetValue(m_index, value); + return value; + } + T operator=(T value) const + { + m_array->SetValue(m_index, value); + return value; + } + +private: + constexpr BitFieldArrayRef(BitFieldArray* array, size_t index) + : m_array(array), m_index(index) + { + } + + BitFieldArray* const m_array; + const size_t m_index; +}; + +// Satisfies LegacyOutputIterator / std::output_iterator. +// Does not satisfy LegacyInputIterator / std::input_iterator as std::output_iterator_tag does not +// extend std::input_iterator_tag. +// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real +// references instead of proxy objects. +// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join. +template +class BitFieldArrayIterator +{ + friend struct BitFieldArray; + +public: + using iterator_category = std::output_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = void; + using reference = BitFieldArrayRef; + +private: + constexpr BitFieldArrayIterator(BitFieldArray* array, size_t index) + : m_array(array), m_index(index) + { + } + +public: + // Required by std::input_or_output_iterator + constexpr BitFieldArrayIterator() = default; + // Required by LegacyIterator + constexpr BitFieldArrayIterator(const BitFieldArrayIterator& other) = default; + // Required by LegacyIterator + BitFieldArrayIterator& operator=(const BitFieldArrayIterator& other) = default; + // Move constructor and assignment operators, explicitly defined for completeness + constexpr BitFieldArrayIterator(BitFieldArrayIterator&& other) = default; + BitFieldArrayIterator& operator=(BitFieldArrayIterator&& other) = default; + +public: + BitFieldArrayIterator& operator++() + { + m_index++; + return *this; + } + BitFieldArrayIterator operator++(int) + { + BitFieldArrayIterator other(*this); + ++*this; + return other; + } + constexpr reference operator*() const { return reference(m_array, m_index); } + constexpr bool operator==(BitFieldArrayIterator other) const { return m_index == other.m_index; } + constexpr bool operator!=(BitFieldArrayIterator other) const { return m_index != other.m_index; } + +private: + BitFieldArray* m_array; + size_t m_index; +}; + +// Satisfies LegacyInputIterator / std::input_iterator. +// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real +// references instead of proxy objects. +// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join. +template +class BitFieldArrayConstIterator +{ + friend struct BitFieldArray; + +public: + using iterator_category = std::input_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = void; + using reference = BitFieldArrayConstRef; + +private: + constexpr BitFieldArrayConstIterator(const BitFieldArray* array, + size_t index) + : m_array(array), m_index(index) + { + } + +public: + // Required by std::input_or_output_iterator + constexpr BitFieldArrayConstIterator() = default; + // Required by LegacyIterator + constexpr BitFieldArrayConstIterator(const BitFieldArrayConstIterator& other) = default; + // Required by LegacyIterator + BitFieldArrayConstIterator& operator=(const BitFieldArrayConstIterator& other) = default; + // Move constructor and assignment operators, explicitly defined for completeness + constexpr BitFieldArrayConstIterator(BitFieldArrayConstIterator&& other) = default; + BitFieldArrayConstIterator& operator=(BitFieldArrayConstIterator&& other) = default; + +public: + BitFieldArrayConstIterator& operator++() + { + m_index++; + return *this; + } + BitFieldArrayConstIterator operator++(int) + { + BitFieldArrayConstIterator other(*this); + ++*this; + return other; + } + constexpr reference operator*() const { return reference(m_array, m_index); } + constexpr bool operator==(BitFieldArrayConstIterator other) const + { + return m_index == other.m_index; + } + constexpr bool operator!=(BitFieldArrayConstIterator other) const + { + return m_index != other.m_index; + } + +private: + const BitFieldArray* m_array; + size_t m_index; +}; diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 1420675d4..f616cb548 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -15,11 +15,19 @@ #define CONCAT2(x, y) DO_CONCAT2(x, y) #define DO_CONCAT2(x, y) x##y -// helper macro to properly align structure members. -// Calling INSERT_PADDING_BYTES will add a new member variable with a name like "pad121", -// depending on the current source line to make sure variable names are unique. -#define INSERT_PADDING_BYTES(num_bytes) u8 CONCAT2(pad, __LINE__)[(num_bytes)] -#define INSERT_PADDING_WORDS(num_words) u32 CONCAT2(pad, __LINE__)[(num_words)] +/// Helper macros to insert unused bytes or words to properly align structs. These values will be +/// zero-initialized. +#define INSERT_PADDING_BYTES(num_bytes) \ + [[maybe_unused]] std::array CONCAT2(pad, __LINE__) {} +#define INSERT_PADDING_WORDS(num_words) \ + [[maybe_unused]] std::array CONCAT2(pad, __LINE__) {} + +/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents. +/// This keeps the structure trivial to construct. +#define INSERT_PADDING_BYTES_NOINIT(num_bytes) \ + [[maybe_unused]] std::array CONCAT2(pad, __LINE__) +#define INSERT_PADDING_WORDS_NOINIT(num_words) \ + [[maybe_unused]] std::array CONCAT2(pad, __LINE__) // Inlining #ifdef _WIN32 diff --git a/src/common/hash.h b/src/common/hash.h index bcdcc4c5f..ac246fbb4 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -11,6 +11,15 @@ namespace Common { +/** + * Disables rehashing for std::unordered_map + */ +struct IdentityHash { + u64 operator()(const u64 hash) const { + return hash; + } +}; + /** * Computes a 64-bit hash over the specified block of data * @param data Block of data to compute hash over @@ -33,6 +42,14 @@ static inline u64 ComputeStructHash64(const T& data) noexcept { return ComputeHash64(&data, sizeof(data)); } +/** + * Combines hash lhs with hash rhs providing a unique result. + */ +static inline std::size_t HashCombine(std::size_t lhs, std::size_t rhs) noexcept { + lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); + return lhs; +} + /// A helper template that ensures the padding in a struct is initialized by memsetting to 0. template struct HashableStruct { diff --git a/src/common/intrusive_ptr.h b/src/common/intrusive_ptr.h new file mode 100644 index 000000000..b05d89a5b --- /dev/null +++ b/src/common/intrusive_ptr.h @@ -0,0 +1,261 @@ +/* Copyright (c) 2017-2022 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include + +/// Simple reference counter for single threaded environments +class SingleThreadCounter { +public: + inline void AddRef() { + count++; + } + + inline bool Release() { + return --count == 0; + } + +private: + std::size_t count = 1; +}; + +/// Thread-safe reference counter with atomics +class MultiThreadCounter { +public: + MultiThreadCounter() { + count.store(1, std::memory_order_relaxed); + } + + inline void AddRef() { + count.fetch_add(1, std::memory_order_relaxed); + } + + inline bool Release() { + auto result = count.fetch_sub(1, std::memory_order_acq_rel); + return result == 1; + } + +private: + std::atomic_size_t count; +}; + +template +class IntrusivePtr; + +template , + typename ReferenceOps = SingleThreadCounter> +class IntrusivePtrEnabled { +public: + using IntrusivePtrType = IntrusivePtr; + using EnabledBase = T; + using EnabledDeleter = Deleter; + using EnabledReferenceOp = ReferenceOps; + + IntrusivePtrEnabled() = default; + IntrusivePtrEnabled(const IntrusivePtrEnabled &) = delete; + void operator=(const IntrusivePtrEnabled &) = delete; + + /// Decrement the reference counter and optionally free the memory + inline void ReleaseRef() { + if (ref_counter.Release()) { + Deleter()(static_cast(this)); + } + } + + /// Increment the reference counter + inline void AddRef() { + ref_counter.AddRef(); + } + +protected: + IntrusivePtr RefFromThis(); + +private: + ReferenceOps ref_counter; +}; + +/** + * Lightweight alternative to std::shared_ptr for reference counting + * usecases + */ +template +class IntrusivePtr { + using ReferenceBase = IntrusivePtrEnabled< + typename T::EnabledBase, + typename T::EnabledDeleter, + typename T::EnabledReferenceOp>; + + template + friend class IntrusivePtr; +public: + IntrusivePtr() = default; + explicit IntrusivePtr(T *handle) : data(handle) {} + + template + IntrusivePtr(const IntrusivePtr &other) { + *this = other; + } + + IntrusivePtr(const IntrusivePtr &other) { + *this = other; + } + + template + IntrusivePtr(IntrusivePtr &&other) noexcept { + *this = std::move(other); + } + + IntrusivePtr(IntrusivePtr &&other) noexcept { + *this = std::move(other); + } + + ~IntrusivePtr() { + Reset(); + } + + /// Returns a reference to the underlying data + T& operator*() { + return *data; + } + + /// Returns an immutable reference to the underlying data + const T& operator*() const { + return *data; + } + + /// Returns a pointer to the underlying data + T* operator->() { + return data; + } + + /// Returns an immutable pointer to the underlying data + const T* operator->() const { + return data; + } + + /// Returns true if the underlaying pointer it valid + bool IsValid() const { + return data != nullptr; + } + + /// Default comparison operators + auto operator<=>(const IntrusivePtr& other) const = default; + + /// Returns the raw pointer to the data + T* Get() { + return data; + } + + /// Returns an immutable raw pointer to the data + const T* Get() const { + return data; + } + + void Reset() { + // Static up-cast here to avoid potential issues with multiple intrusive inheritance. + // Also makes sure that the pointer type actually inherits from this type. + if (data) + static_cast(data)->ReleaseRef(); + data = nullptr; + } + + template + IntrusivePtr& operator=(const IntrusivePtr& other) { + static_assert(std::is_base_of_v, "Cannot safely assign downcasted intrusive pointers."); + + Reset(); + data = static_cast(other.data); + + // Static up-cast here to avoid potential issues with multiple intrusive inheritance. + // Also makes sure that the pointer type actually inherits from this type. + if (data) { + static_cast(data)->ReleaseRef(); + } + + return *this; + } + + IntrusivePtr& operator=(const IntrusivePtr& other) { + if (this != &other) { + Reset(); + data = other.data; + if (data) + static_cast(data)->AddRef(); + } + + return *this; + } + + template + IntrusivePtr &operator=(IntrusivePtr &&other) noexcept { + Reset(); + data = std::exchange(other.data, nullptr); + return *this; + } + + IntrusivePtr &operator=(IntrusivePtr &&other) noexcept { + if (this != &other) { + Reset(); + data = std::exchange(other.data, nullptr); + } + + return *this; + } + + T* Release() & { + return std::exchange(data, nullptr); + } + + T* Release() && { + return std::exchange(data, nullptr); + } + +private: + T* data = nullptr; +}; + +template +IntrusivePtr IntrusivePtrEnabled::RefFromThis() { + AddRef(); + return IntrusivePtr(static_cast(this)); +} + +template +using DerivedIntrusivePtrType = IntrusivePtr; + +template +DerivedIntrusivePtrType MakeHandle(P &&... p) { + return DerivedIntrusivePtrType(new T(std::forward

(p)...)); +} + +template +typename Base::IntrusivePtrType MakeDerivedHandle(P &&... p) { + return typename Base::IntrusivePtrType(new Derived(std::forward

(p)...)); +} + +template +using ThreadSafeIntrusivePtrEnabled = IntrusivePtrEnabled, MultiThreadCounter>; diff --git a/src/common/object_pool.cpp b/src/common/object_pool.cpp new file mode 100644 index 000000000..c59373f57 --- /dev/null +++ b/src/common/object_pool.cpp @@ -0,0 +1,70 @@ +/* Copyright (c) 2017-2022 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "common/object_pool.h" +#include +#include +#ifdef _WIN32 +#include +#endif + +void* memalign_alloc(size_t boundary, size_t size) { +#if defined(_WIN32) + return _aligned_malloc(size, boundary); +#elif defined(_ISOC11_SOURCE) + return aligned_alloc(boundary, size); +#elif (_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600) + void *ptr = nullptr; + if (posix_memalign(&ptr, boundary, size) < 0) { + return nullptr; + } + return ptr; +#else + // Align stuff ourselves. Kinda ugly, but will work anywhere. + void **place; + uintptr_t addr = 0; + void *ptr = malloc(boundary + size + sizeof(uintptr_t)); + + if (ptr == nullptr) { + return nullptr; + } + + addr = ((uintptr_t)ptr + sizeof(uintptr_t) + boundary) & ~(boundary - 1); + place = (void **) addr; + place[-1] = ptr; + + return (void *) addr; +#endif +} + +void memalign_free(void *ptr) { +#if defined(_WIN32) + _aligned_free(ptr); +#elif !defined(_ISOC11_SOURCE) && !((_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600)) + if (ptr != nullptr) { + void **p = (void **) ptr; + free(p[-1]); + } +#else + free(ptr); +#endif +} diff --git a/src/common/object_pool.h b/src/common/object_pool.h new file mode 100644 index 000000000..fdec8eaae --- /dev/null +++ b/src/common/object_pool.h @@ -0,0 +1,148 @@ +/* Copyright (c) 2017-2022 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include + +void *memalign_alloc(size_t boundary, size_t size); +void memalign_free(void *ptr); + +template +struct AlignedAllocation { + static void* operator new(size_t size) { + void* ret = memalign_alloc(alignof(T), size); + if (!ret) throw std::bad_alloc(); + return ret; + } + + static void* operator new[](size_t size) { + void* ret = memalign_alloc(alignof(T), size); + if (!ret) throw std::bad_alloc(); + return ret; + } + + static void operator delete(void *ptr) { + return memalign_free(ptr); + } + + static void operator delete[](void *ptr) { + return memalign_free(ptr); + } +}; + +/** + * Allocates objects of type T in batches of 64 * n where + * n is the number of times the pool has grown. So the first + * time it will allocate 64, then 128 objects etc. + */ +template +class ObjectPool { +public: + template + T* Allocate(P&&... p) { +#ifndef OBJECT_POOL_DEBUG + if (vacants.empty()) { + unsigned num_objects = 64u << memory.size(); + T *ptr = static_cast(memalign_alloc(std::max(64, alignof(T)), + num_objects * sizeof(T))); + if (!ptr) { + return nullptr; + } + + for (unsigned i = 0; i < num_objects; i++) { + vacants.push_back(&ptr[i]); + } + + memory.emplace_back(ptr); + } + + T *ptr = vacants.back(); + vacants.pop_back(); + new(ptr) T(std::forward

(p)...); + return ptr; +#else + return new T(std::forward

(p)...); +#endif + } + + void Free(T *ptr) { +#ifndef OBJECT_POOL_DEBUG + ptr->~T(); + vacants.push_back(ptr); +#else + delete ptr; +#endif + } + + void Clear() { +#ifndef OBJECT_POOL_DEBUG + vacants.clear(); + memory.clear(); +#endif + } + +protected: +#ifndef OBJECT_POOL_DEBUG + std::vector vacants; + + struct MallocDeleter { + void operator()(T *ptr) { + memalign_free(ptr); + } + }; + + std::vector> memory; +#endif +}; + +template +class ThreadSafeObjectPool : private ObjectPool { +public: + template + T* Allocate(P &&... p) { + std::lock_guard holder{lock}; + return ObjectPool::Allocate(std::forward

(p)...); + } + + void Free(T *ptr) { +#ifndef OBJECT_POOL_DEBUG + ptr->~T(); + std::lock_guard holder{lock}; + this->vacants.push_back(ptr); +#else + delete ptr; +#endif + } + + void Clear() { + std::lock_guard holder{lock}; + ObjectPool::Clear(); + } + +private: + std::mutex lock; +}; diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 0139db232..58412d6fc 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -158,10 +158,10 @@ struct ABIFrameInfo { inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment, std::size_t needed_frame_size) { - int count = (regs & ABI_ALL_GPRS).count(); + std::size_t count = (regs & ABI_ALL_GPRS).count(); rsp_alignment -= count * 8; std::size_t subtraction = 0; - int xmm_count = (regs & ABI_ALL_XMMS).count(); + std::size_t xmm_count = (regs & ABI_ALL_XMMS).count(); if (xmm_count) { // If we have any XMMs to save, we must align the stack here. subtraction = rsp_alignment & 0xF; diff --git a/src/core/settings.h b/src/core/settings.h index 398016148..6e95d7f1d 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -29,11 +29,9 @@ enum class LayoutOption { SingleScreen, LargeScreen, SideScreen, - // Similiar to default, but better for mobile devices in portrait mode. Top screen in clamped to // the top of the frame, and the bottom screen is enlarged to match the top screen. MobilePortrait, - // Similiar to LargeScreen, but better for mobile devices in landscape mode. The screens are // clamped to the top of the frame, and the bottom screen is a bit bigger. MobileLandscape, @@ -116,7 +114,6 @@ namespace NativeAnalog { enum Values { CirclePad, CStick, - NumAnalogs, }; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2e2d47c0d..67866f1ef 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -9,12 +9,12 @@ add_library(video_core STATIC pica.cpp pica.h pica_state.h - pica_types.h primitive_assembly.cpp primitive_assembly.h rasterizer_interface.h - regs.cpp - regs.h + pica_regs.inc + pica.cpp + pica.h regs_framebuffer.h regs_lighting.h regs_pipeline.h @@ -23,6 +23,15 @@ add_library(video_core STATIC regs_texturing.h renderer_base.cpp renderer_base.h + common/backend.h + common/buffer.h + common/framebuffer.h + common/pica_types.h + common/shader_gen.cpp + common/shader_gen.h + common/shader.h + common/texture.h + common/pipeline.h renderer_opengl/frame_dumper_opengl.cpp renderer_opengl/frame_dumper_opengl.h renderer_opengl/gl_rasterizer.cpp @@ -73,16 +82,21 @@ add_library(video_core STATIC renderer_vulkan/pica_to_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/renderer_vulkan.h + renderer_vulkan/vk_backend.cpp + renderer_vulkan/vk_backend.h renderer_vulkan/vk_buffer.cpp renderer_vulkan/vk_buffer.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h renderer_vulkan/vk_format_reinterpreter.cpp renderer_vulkan/vk_format_reinterpreter.h + renderer_vulkan/vk_format_util.cpp + renderer_vulkan/vk_format_util.h renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.h - renderer_vulkan/vk_pipeline_builder.cpp - renderer_vulkan/vk_pipeline_builder.h + renderer_vulkan/vk_pipeline.cpp + renderer_vulkan/vk_pipeline.h + renderer_vulkan/vk_platform.h renderer_vulkan/vk_rasterizer_cache.cpp renderer_vulkan/vk_rasterizer_cache.h renderer_vulkan/vk_rasterizer.cpp @@ -90,6 +104,8 @@ add_library(video_core STATIC renderer_vulkan/vk_shader_state.h renderer_vulkan/vk_shader_gen.cpp renderer_vulkan/vk_shader_gen.h + renderer_vulkan/vk_shader.cpp + renderer_vulkan/vk_shader.h renderer_vulkan/vk_state.cpp renderer_vulkan/vk_state.h renderer_vulkan/vk_surface_params.cpp @@ -180,7 +196,7 @@ target_link_libraries(video_core PRIVATE glad::glad glm::glm nihstro-headers Boo # Include Vulkan headers target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) target_include_directories(video_core PRIVATE ../../externals/vma/include) -target_link_libraries(video_core PRIVATE glslang SPIRV glslang-default-resource-limits OGLCompiler) +target_link_libraries(video_core PRIVATE glslang SPIRV robin_hood::robin_hood) if (ARCHITECTURE_x86_64) target_link_libraries(video_core PUBLIC xbyak::xbyak) diff --git a/src/video_core/common/backend.h b/src/video_core/common/backend.h new file mode 100644 index 000000000..19f5a8f32 --- /dev/null +++ b/src/video_core/common/backend.h @@ -0,0 +1,60 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/object_pool.h" +#include "common/vector_math.h" +#include "video_core/common/pipeline.h" +#include "video_core/common/framebuffer.h" + +namespace Frontend { +class EmuWindow; +} + +namespace VideoCore { + +/// Common interface of a video backend +class BackendBase { +public: + BackendBase(Frontend::EmuWindow& window) : window(window) {} + virtual ~BackendBase() = default; + + // Triggers a swapchain buffer swap + virtual void SwapBuffers(); + + // Creates a backend specific texture handle + virtual TextureHandle CreateTexture(TextureInfo info) = 0; + + // Creates a backend specific buffer handle + virtual BufferHandle CreateBuffer(BufferInfo info) = 0; + + // Creates a backend specific framebuffer handle + virtual FramebufferHandle CreateFramebuffer(FramebufferInfo info) = 0; + + // Creates a backend specific pipeline handle + virtual PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) = 0; + + // Creates a backend specific sampler object + virtual SamplerHandle CreateSampler(SamplerInfo info) = 0; + + // Start a draw operation + virtual void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, + BufferHandle vertex_buffer, + u32 base_vertex, u32 num_vertices) = 0; + + // Start an indexed draw operation + virtual void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, + BufferHandle vertex_buffer, BufferHandle index_buffer, + u32 base_index, u32 num_indices, u32 base_vertex) = 0; + + // Executes a compute shader + virtual void DispatchCompute(PipelineHandle pipeline, Common::Vec3 groupsize, + Common::Vec3 groups) = 0; + +private: + Frontend::EmuWindow& window; +}; + +} // namespace VideoCore diff --git a/src/video_core/common/buffer.h b/src/video_core/common/buffer.h new file mode 100644 index 000000000..c17b1662a --- /dev/null +++ b/src/video_core/common/buffer.h @@ -0,0 +1,102 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/hash.h" +#include "common/intrusive_ptr.h" + +namespace VideoCore { + +enum class BufferUsage : u8 { + Vertex = 0, + Index = 1, + Uniform = 2, + Texel = 3, + Staging = 4, + Undefined = 255 +}; + +enum class ViewFormat : u8 { + R32Float = 0, + R32G32Float = 1, + R32G32B32Float = 2, + R32G32B32A32Float = 3, + Undefined = 255 +}; + +constexpr u32 MAX_BUFFER_VIEWS = 3; + +struct BufferInfo { + u32 capacity = 0; + BufferUsage usage = BufferUsage::Undefined; + std::array views{ViewFormat::Undefined}; + + const u64 Hash() const { + return Common::ComputeStructHash64(*this); + } +}; + +static_assert(sizeof(BufferInfo) == 8, "BufferInfo not packed!"); +static_assert(std::is_standard_layout_v, "BufferInfo is not a standard layout!"); + +class BufferBase : public IntrusivePtrEnabled { +public: + BufferBase() = default; + BufferBase(const BufferInfo& info) : info(info) {} + virtual ~BufferBase() = default; + + /// Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes + /// and the optional alignment requirement. + /// The actual used size must be specified on unmapping the chunk. + virtual std::span Map(u32 size, u32 alignment = 0) {}; + + /// Flushes write to buffer memory + virtual void Commit(u32 size = 0) {}; + + /// Returns the size of the buffer in bytes + u32 GetCapacity() const { + return info.capacity; + } + + /// Returns the usage of the buffer + BufferUsage GetUsage() const { + return info.usage; + } + + /// Returns the starting offset of the currently mapped buffer slice + u64 GetCurrentOffset() const { + return buffer_offset; + } + + /// Returns whether the buffer was invalidated by the most recent Map call + bool IsInvalid() const { + return invalid; + } + + /// Invalidates the buffer + void Invalidate() { + buffer_offset = 0; + invalid = true; + } + +protected: + BufferInfo info{}; + u32 buffer_offset = 0; + bool invalid = false; +}; + +using BufferHandle = IntrusivePtr; + +} // namespace VideoCore + +namespace std { +template <> +struct hash { + std::size_t operator()(const VideoCore::BufferInfo& info) const noexcept { + return info.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/common/framebuffer.h b/src/video_core/common/framebuffer.h new file mode 100644 index 000000000..86a8e7219 --- /dev/null +++ b/src/video_core/common/framebuffer.h @@ -0,0 +1,69 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/common/texture.h" + +namespace VideoCore { + +enum class MSAASamples : u32 { + x1, + x2, + x4, + x8 +}; + +/** + * Information about a framebuffer + */ +struct FramebufferInfo { + TextureHandle color; + TextureHandle depth_stencil; + MSAASamples samples = MSAASamples::x1; + Rect2D draw_rect{}; + + /// Hashes the framebuffer object and returns a unique identifier + const u64 Hash() const { + // The only member IntrusivePtr has is a pointer to the + // handle so it's fine hash it + return Common::ComputeStructHash64(*this); + } +}; + +/** + * A framebuffer is a collection of render targets and their configuration + */ +class FramebufferBase : public IntrusivePtrEnabled { +public: + FramebufferBase(const FramebufferInfo& info) : info(info) {} + virtual ~FramebufferBase() = default; + + /// Returns an immutable reference to the color attachment + const TextureHandle& GetColorAttachment() const { + return info.color; + } + + /// Returns an immutable reference to the depth/stencil attachment + const TextureHandle& GetDepthStencilAttachment() const { + return info.depth_stencil; + } + + /// Returns how many samples the framebuffer takes + MSAASamples GetMSAASamples() const { + return info.samples; + } + + /// Returns the rendering area + Rect2D GetDrawRectangle() const { + return info.draw_rect; + } + +protected: + FramebufferInfo info; +}; + +using FramebufferHandle = IntrusivePtr; + +} // namespace VideoCore diff --git a/src/video_core/common/pica_types.h b/src/video_core/common/pica_types.h new file mode 100644 index 000000000..99a8248ac --- /dev/null +++ b/src/video_core/common/pica_types.h @@ -0,0 +1,157 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/common_types.h" + +namespace Pica { + +/** + * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision + * floating point. + * + * When decoding, format is as follows: + * - The first `M` bits are the mantissa + * - The next `E` bits are the exponent + * - The last bit is the sign bit + * + * @todo Verify on HW if this conversion is sufficiently accurate. + */ +template +struct Float { + static constexpr u32 width = M + E + 1; + static constexpr u32 bias = 128 - (1 << (E - 1)); + static constexpr u32 exponent_mask = (1 << E) - 1; + static constexpr u32 mantissa_mask = (1 << M) - 1; + static constexpr u32 sign_mask = 1 << (E + M); +public: + static Float FromFloat32(float val) { + Float ret; + ret.value = val; + return ret; + } + + static Float FromRaw(u32 hex) { + Float res; + + u32 exponent = (hex >> M) & exponent_mask; + const u32 mantissa = hex & mantissa_mask; + const u32 sign = (hex & sign_mask) << (31 - M - E); + + if (hex & (mantissa_mask | (exponent_mask << M))) { + if (exponent == exponent_mask) { + exponent = 255; + } else { + exponent += bias; + } + + hex = sign | (mantissa << (23 - M)) | (exponent << 23); + } else { + hex = sign; + } + + std::memcpy(&res.value, &hex, sizeof(float)); + return res; + } + + static Float Zero() { + return FromFloat32(0.f); + } + + // Not recommended for anything but logging + float ToFloat32() const { + return value; + } + + Float operator*(const Float& flt) const { + float result = value * flt.ToFloat32(); + // PICA gives 0 instead of NaN when multiplying by inf + if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) { + result = 0.f; + } + + return Float::FromFloat32(result); + } + + Float operator/(const Float& flt) const { + return Float::FromFloat32(ToFloat32() / flt.ToFloat32()); + } + + Float operator+(const Float& flt) const { + return Float::FromFloat32(ToFloat32() + flt.ToFloat32()); + } + + Float operator-(const Float& flt) const { + return Float::FromFloat32(ToFloat32() - flt.ToFloat32()); + } + + Float& operator*=(const Float& flt) { + value = operator*(flt).value; + return *this; + } + + Float& operator/=(const Float& flt) { + value /= flt.ToFloat32(); + return *this; + } + + Float& operator+=(const Float& flt) { + value += flt.ToFloat32(); + return *this; + } + + Float& operator-=(const Float& flt) { + value -= flt.ToFloat32(); + return *this; + } + + Float operator-() const { + return Float::FromFloat32(-ToFloat32()); + } + + bool operator<(const Float& flt) const { + return ToFloat32() < flt.ToFloat32(); + } + + bool operator>(const Float& flt) const { + return ToFloat32() > flt.ToFloat32(); + } + + bool operator>=(const Float& flt) const { + return ToFloat32() >= flt.ToFloat32(); + } + + bool operator<=(const Float& flt) const { + return ToFloat32() <= flt.ToFloat32(); + } + + bool operator==(const Float& flt) const { + return ToFloat32() == flt.ToFloat32(); + } + + bool operator!=(const Float& flt) const { + return ToFloat32() != flt.ToFloat32(); + } + +private: + // Stored as a regular float, merely for convenience + // TODO: Perform proper arithmetic on this! + float value; + + friend class boost::serialization::access; + template + void serialize(Archive& ar, const unsigned int file_version) { + ar& value; + } +}; + +using Float24 = Float<16, 7>; +using Float20 = Float<12, 7>; +using Float16 = Float<10, 5>; + +} // namespace Pica diff --git a/src/video_core/common/pipeline.h b/src/video_core/common/pipeline.h new file mode 100644 index 000000000..c159f7f98 --- /dev/null +++ b/src/video_core/common/pipeline.h @@ -0,0 +1,223 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/bit_field_array.h" +#include "common/hash.h" +#include "video_core/common/buffer.h" +#include "video_core/common/texture.h" +#include "video_core/common/shader.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/regs_pipeline.h" + +namespace VideoCore { + +constexpr u32 MAX_SHADER_STAGES = 3; +constexpr u32 MAX_VERTEX_ATTRIBUTES = 8; +constexpr u32 MAX_BINDINGS_IN_GROUP = 7; +constexpr u32 MAX_BINDING_GROUPS = 6; + +enum class PipelineType : u8 { + Compute = 0, + Graphics = 1 +}; + +enum class BindingType : u32 { + None = 0, + Uniform = 1, + UniformDynamic = 2, + TexelBuffer = 3, + Texture = 4, + Sampler = 5, + StorageImage = 6 +}; + +using BindingGroup = BitFieldArray<0, 3, MAX_BINDINGS_IN_GROUP, BindingType>; + +/** + * Describes all the resources used in the pipeline + */ +struct PipelineLayoutInfo { + u8 group_count = 0; + std::array binding_groups{}; + u8 push_constant_block_size = 0; +}; + +/** + * The pipeline state is tightly packed with bitfields to reduce + * the overhead of hashing as much as possible + */ +union RasterizationState { + u8 value = 0; + BitField<0, 2, Pica::TriangleTopology> topology; + BitField<4, 2, Pica::CullMode> cull_mode; +}; + +union DepthStencilState { + u64 value = 0; + BitField<0, 1, u64> depth_test_enable; + BitField<1, 1, u64> depth_write_enable; + BitField<2, 1, u64> stencil_test_enable; + BitField<3, 3, Pica::CompareFunc> depth_compare_op; + BitField<6, 3, Pica::StencilAction> stencil_fail_op; + BitField<9, 3, Pica::StencilAction> stencil_pass_op; + BitField<12, 3, Pica::StencilAction> stencil_depth_fail_op; + BitField<15, 3, Pica::CompareFunc> stencil_compare_op; + BitField<18, 8, u64> stencil_reference; + BitField<26, 8, u64> stencil_compare_mask; + BitField<34, 8, u64> stencil_write_mask; +}; + +union BlendState { + u32 value = 0; + BitField<0, 4, Pica::BlendFactor> src_color_blend_factor; + BitField<4, 4, Pica::BlendFactor> dst_color_blend_factor; + BitField<8, 3, Pica::BlendEquation> color_blend_eq; + BitField<11, 4, Pica::BlendFactor> src_alpha_blend_factor; + BitField<15, 4, Pica::BlendFactor> dst_alpha_blend_factor; + BitField<19, 3, Pica::BlendEquation> alpha_blend_eq; + BitField<22, 4, u32> color_write_mask; +}; + +enum class AttribType : u8 { + Float = 0, + Int = 1, + Short = 2 +}; + +union VertexAttribute { + u8 value = 0; + BitField<0, 2, AttribType> type; + BitField<2, 3, u8> components; +}; + +#pragma pack(1) +struct VertexLayout { + u8 stride = 0; + std::array attributes; +}; +#pragma pack() + +/** + * Information about a graphics/compute pipeline + */ +#pragma pack(1) +struct PipelineInfo { + std::array shaders{}; + VertexLayout vertex_layout{}; + PipelineLayoutInfo layout{}; + BlendState blending{}; + DepthStencilState depth_stencil{}; + RasterizationState rasterization{}; + + const u64 Hash() const { + return Common::ComputeStructHash64(*this); + } +}; +#pragma pack() + +class PipelineBase : public IntrusivePtrEnabled { +public: + PipelineBase(PipelineType type, PipelineInfo info) : + type(type), info(info) {} + virtual ~PipelineBase() = default; + + // Disable copy constructor + PipelineBase(const PipelineBase&) = delete; + PipelineBase& operator=(const PipelineBase&) = delete; + + // Binds the texture in the specified slot + virtual void BindTexture(u32 group, u32 slot, TextureHandle handle) = 0; + + // Binds the texture in the specified slot + virtual void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) = 0; + + // Binds the sampler in the specified slot + virtual void BindSampler(u32 group, u32 slot, SamplerHandle handle) = 0; + + /// Sets the primitive topology + void SetTopology(Pica::TriangleTopology topology) { + info.rasterization.topology.Assign(topology); + } + + /// Sets the culling mode + void SetCullMode(Pica::CullMode mode) { + info.rasterization.cull_mode.Assign(mode); + } + + /// Configures the color blending function + void SetColorBlendFunc(Pica::BlendFactor src_color_factor, + Pica::BlendFactor dst_color_factor, + Pica::BlendEquation color_eq) { + info.blending.src_color_blend_factor.Assign(src_color_factor); + info.blending.dst_color_blend_factor.Assign(dst_color_factor); + info.blending.color_blend_eq.Assign(color_eq); + } + + /// Configures the alpha blending function + void SetAlphaBlendFunc(Pica::BlendFactor src_alpha_factor, + Pica::BlendFactor dst_alpha_factor, + Pica::BlendEquation alpha_eq) { + info.blending.src_alpha_blend_factor.Assign(src_alpha_factor); + info.blending.dst_alpha_blend_factor.Assign(dst_alpha_factor); + info.blending.alpha_blend_eq.Assign(alpha_eq); + } + + /// Sets the color write mask + void SetColorWriteMask(u32 mask) { + info.blending.color_write_mask.Assign(mask); + } + + /// Configures the depth test + void SetDepthTest(bool enable, Pica::CompareFunc compare_op) { + info.depth_stencil.depth_test_enable.Assign(enable); + info.depth_stencil.depth_compare_op.Assign(compare_op); + } + + /// Enables or disables depth writes + void SetDepthWrites(bool enable) { + info.depth_stencil.depth_write_enable.Assign(enable); + } + + /// Configures the stencil test + void SetStencilTest(bool enable, Pica::StencilAction fail, Pica::StencilAction pass, + Pica::StencilAction depth_fail, Pica::CompareFunc compare, u32 ref) { + info.depth_stencil.stencil_test_enable.Assign(enable); + info.depth_stencil.stencil_fail_op.Assign(fail); + info.depth_stencil.stencil_pass_op.Assign(pass); + info.depth_stencil.stencil_depth_fail_op.Assign(depth_fail); + info.depth_stencil.stencil_compare_op.Assign(compare); + info.depth_stencil.stencil_reference.Assign(ref); + } + + /// Selects the bits of the stencil values participating in the stencil test + void SetStencilCompareMask(u32 mask) { + info.depth_stencil.stencil_compare_mask.Assign(mask); + } + + /// Selects the bits of the stencil values updated by the stencil test + void SetStencilWriteMask(u32 mask) { + info.depth_stencil.stencil_write_mask.Assign(mask); + } + +protected: + PipelineType type = PipelineType::Graphics; + PipelineInfo info{}; +}; + +using PipelineHandle = IntrusivePtr; + +} // namespace VideoCore + +namespace std { +template <> +struct hash { + std::size_t operator()(const VideoCore::PipelineInfo& info) const noexcept { + return info.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/common/shader.h b/src/video_core/common/shader.h new file mode 100644 index 000000000..6020322b9 --- /dev/null +++ b/src/video_core/common/shader.h @@ -0,0 +1,62 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "common/intrusive_ptr.h" + +namespace VideoCore { + +enum class ShaderStage : u32 { + Vertex = 0, + Geometry = 1, + Fragment = 2, + Compute = 3, + Undefined = 4 +}; + +// Tells the module how much to optimize the bytecode +enum class ShaderOptimization : u32 { + High = 0, + Debug = 1 +}; + +/// Compiles shader source to backend representation +class ShaderBase : public IntrusivePtrEnabled { +public: + ShaderBase(ShaderStage stage, std::string_view name, std::string&& source) : + name(name), stage(stage), source(source) {} + virtual ~ShaderBase() = default; + + /// Compiles the shader source code + virtual bool Compile(ShaderOptimization level) = 0; + + /// Returns the API specific shader bytecode + std::string_view GetSource() const { + return source; + } + + /// Returns the name given the shader module + std::string_view GetName() const { + return name; + } + + /// Returns the pipeline stage the shader is assigned to + ShaderStage GetStage() const { + return stage; + } + +protected: + std::string_view name = "None"; + ShaderStage stage = ShaderStage::Undefined; + std::string source; +}; + +using ShaderHandle = IntrusivePtr; + +} // namespace VideoCore diff --git a/src/video_core/common/shader_gen.cpp b/src/video_core/common/shader_gen.cpp new file mode 100644 index 000000000..035fd0a6b --- /dev/null +++ b/src/video_core/common/shader_gen.cpp @@ -0,0 +1,179 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_set.h" +#include "video_core/video_core.h" +#include "video_core/common/shader_gen.h" + +namespace VideoCore { + +PicaFSConfig::PicaFSConfig(const Pica::Regs& regs) { + scissor_test_mode = regs.rasterizer.scissor_test.mode; + depthmap_enable = regs.rasterizer.depthmap_enable; + alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable + ? regs.framebuffer.output_merger.alpha_test.func.Value() + : Pica::CompareFunc::Always; + texture0_type = regs.texturing.texture0.type; + texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; + + // We don't need these otherwise, reset them to avoid unnecessary shader generation + alphablend_enable = {}; + logic_op = {}; + + // Copy relevant tev stages fields. + // We don't sync const_color here because of the high variance, it is a + // shader uniform instead. + const auto stages = regs.texturing.GetTevStages(); + DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); + for (std::size_t i = 0; i < stages.size(); i++) { + const auto& tev_stage = stages[i]; + tev_stages[i].sources_raw = tev_stage.sources_raw; + tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + tev_stages[i].ops_raw = tev_stage.ops_raw; + tev_stages[i].scales_raw = tev_stage.scales_raw; + } + + fog_mode = regs.texturing.fog_mode; + fog_flip = regs.texturing.fog_flip != 0; + + combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | + regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() + << 4; + + // Fragment lighting + lighting.enable = !regs.lighting.disable; + lighting.src_num = regs.lighting.max_light_index + 1; + + for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) { + u32 num = regs.lighting.light_enable.GetNum(light_index); + const auto& light = regs.lighting.light[num]; + auto& dst_light = lighting.light[light_index]; + + dst_light.num = num; + dst_light.directional = light.config.directional != 0; + dst_light.two_sided_diffuse = light.config.two_sided_diffuse != 0; + dst_light.geometric_factor_0 = light.config.geometric_factor_0 != 0; + dst_light.geometric_factor_1 = light.config.geometric_factor_1 != 0; + dst_light.dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + dst_light.spot_atten_enable = !regs.lighting.IsSpotAttenDisabled(num); + dst_light.shadow_enable = !regs.lighting.IsShadowDisabled(num); + } + + lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; + lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + + lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0; + lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + + // This is a dummy field due to lack of the corresponding register + lighting.lut_sp.enable = true; + lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0; + lighting.lut_sp.type = regs.lighting.lut_input.sp.Value(); + lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); + + lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; + lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + + lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0; + lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + + lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0; + lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + + lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0; + lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + + lighting.config = regs.lighting.config0.config; + lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha; + lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha; + lighting.bump_mode = regs.lighting.config0.bump_mode; + lighting.bump_selector = regs.lighting.config0.bump_selector; + lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; + lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; + + lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0; + lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0; + lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0; + lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0; + lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0; + lighting.shadow_selector = regs.lighting.config0.shadow_selector; + + proctex.enable = regs.texturing.main_config.texture3_enable; + if (proctex.enable) { + proctex.coord = regs.texturing.main_config.texture3_coordinates; + proctex.u_clamp = regs.texturing.proctex.u_clamp; + proctex.v_clamp = regs.texturing.proctex.v_clamp; + proctex.color_combiner = regs.texturing.proctex.color_combiner; + proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; + proctex.separate_alpha = regs.texturing.proctex.separate_alpha; + proctex.noise_enable = regs.texturing.proctex.noise_enable; + proctex.u_shift = regs.texturing.proctex.u_shift; + proctex.v_shift = regs.texturing.proctex.v_shift; + proctex.lut_width = regs.texturing.proctex_lut.width; + proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; + proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; + proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; + proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; + proctex.lod_min = regs.texturing.proctex_lut.lod_min; + proctex.lod_max = regs.texturing.proctex_lut.lod_max; + proctex.lut_filter = regs.texturing.proctex_lut.filter; + } + + shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FragmentOperationMode::Shadow; + + shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; +} + +PicaVSConfig::PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + program_hash = setup.GetProgramCodeHash(); + swizzle_hash = setup.GetSwizzleDataHash(); + main_offset = regs.main_offset; + sanitize_mul = VideoCore::g_hw_shader_accurate_mul; + + num_outputs = 0; + output_map.fill(16); + + for (int reg : Common::BitSet(regs.output_mask)) { + output_map[reg] = num_outputs++; + } +} + +PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs) { + vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); + gs_output_attributes = vs_output_attributes; + + semantic_maps.fill({16, 0}); + for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { + const std::array semantics = { + regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), + }; + + for (u32 comp = 0; comp < 4; ++comp) { + const std::size_t semantic = static_cast(semantics[comp]); + if (semantic < 24) { + semantic_maps[semantic] = {attrib, comp}; + } else if (semantic != Pica::RasterizerRegs::VSOutputAttributes::INVALID) { + LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); + } + } + } +} + +} // namespace VideoCore diff --git a/src/video_core/common/shader_gen.h b/src/video_core/common/shader_gen.h new file mode 100644 index 000000000..334f44ce7 --- /dev/null +++ b/src/video_core/common/shader_gen.h @@ -0,0 +1,227 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/hash.h" +#include "video_core/regs.h" +#include "video_core/shader/shader.h" + +namespace VideoCore { + +enum Attributes { + ATTRIBUTE_POSITION, + ATTRIBUTE_COLOR, + ATTRIBUTE_TEXCOORD0, + ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, +}; + +// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() +struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + + explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { + Pica::TexturingRegs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } +}; + +/** + * This struct contains all state used to generate the GLSL fragment shader that emulates the + * current Pica register configuration. This struct is used as a cache key for generated GLSL shader + * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by + * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where + * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) + * two separate shaders sharing the same key. + */ +struct PicaFSConfig { + explicit PicaFSConfig(const Pica::Regs& regs); + + /// Returns the hash of the VS config + const u64 Hash() const noexcept { + return Common::ComputeStructHash64(*this); + } + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); + } + + Pica::CompareFunc alpha_test_func; + Pica::RasterizerRegs::ScissorMode scissor_test_mode; + Pica::TexturingRegs::TextureConfig::TextureType texture0_type; + std::array tev_stages; + bool texture2_use_coord1; + u8 combiner_buffer_input; + + Pica::RasterizerRegs::DepthBuffering depthmap_enable; + Pica::TexturingRegs::FogMode fog_mode; + bool fog_flip; + bool alphablend_enable; + Pica::LogicOp logic_op; + + struct { + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + bool spot_atten_enable; + bool geometric_factor_0; + bool geometric_factor_1; + bool shadow_enable; + } light[8]; + + bool enable; + unsigned src_num; + Pica::LightingRegs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + + Pica::LightingRegs::LightingConfig config; + bool enable_primary_alpha; + bool enable_secondary_alpha; + + bool enable_shadow; + bool shadow_primary; + bool shadow_secondary; + bool shadow_invert; + bool shadow_alpha; + unsigned shadow_selector; + + struct { + bool enable; + bool abs_input; + Pica::LightingRegs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + struct { + bool enable; + u32 coord; + Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; + Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; + bool separate_alpha; + bool noise_enable; + Pica::TexturingRegs::ProcTexShift u_shift, v_shift; + u32 lut_width; + u32 lut_offset0; + u32 lut_offset1; + u32 lut_offset2; + u32 lut_offset3; + u32 lod_min; + u32 lod_max; + Pica::TexturingRegs::ProcTexFilter lut_filter; + } proctex; + + bool shadow_rendering; + bool shadow_texture_orthographic; +}; + +/** + * This struct contains information to identify a host vertex shader generated from PICA vertex + * shader. + */ +struct PicaVSConfig { + explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); + + /// Returns the hash of the VS config + const u64 Hash() const noexcept { + return Common::ComputeStructHash64(*this); + } + + u64 program_hash = 0; + u64 swizzle_hash = 0; + u32 main_offset = 0; + bool sanitize_mul = false; + + // output_map[output register index] -> output attribute index + u32 num_outputs = 0; + std::array output_map{}; +}; + +/** + * This struct contains information to identify a GL geometry shader generated from PICA no-geometry + * shader pipeline + */ +struct PicaFixedGSConfig { + explicit PicaFixedGSConfig(const Pica::Regs& regs); + + /// Returns the hash of the GS config + const u64 Hash() const noexcept { + return Common::ComputeStructHash64(*this); + } + + u32 vs_output_attributes = 0; + u32 gs_output_attributes = 0; + + struct SemanticMap { + u32 attribute_index = 0; + u32 component_index = 0; + }; + + // semantic_maps[semantic name] -> GS output attribute index + component index + std::array semantic_maps{}; +}; + +/** + * Generates backend specific shader modules using the Pica state configuration + * @todo Be replaced with a unified shader compiler + */ +class ShaderGeneratorBase { +public: + ShaderGeneratorBase() = default; + virtual ~ShaderGeneratorBase() = default; + + /** + * Generates the GLSL vertex shader program source code that accepts vertices from software shader + * and directly passes them to the fragment shader. + * @param separable_shader generates shader that can be used for separate shader object + * @returns String of the shader source code + */ + virtual std::string GenerateTrivialVertexShader(bool separable_shader) = 0; + + /** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code + */ + virtual std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, + bool separable_shader) = 0; + + /** + * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline + * @returns String of the shader source code + */ + virtual std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) = 0; + + /** + * Generates the GLSL fragment shader program source code for the current Pica state + * @param config ShaderCacheKey object generated for the current Pica state, used for the shader + * configuration (NOTE: Use state in this struct only, not the Pica registers!) + * @param separable_shader generates shader that can be used for separate shader object + * @returns String of the shader source code + */ + virtual std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) = 0; +}; + +} // namespace VideoCore diff --git a/src/video_core/common/texture.h b/src/video_core/common/texture.h new file mode 100644 index 000000000..2a5bf9ce7 --- /dev/null +++ b/src/video_core/common/texture.h @@ -0,0 +1,171 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/hash.h" +#include "common/intrusive_ptr.h" +#include "video_core/regs_texturing.h" + +namespace VideoCore { + +constexpr u32 MAX_COLOR_FORMATS = 5; +constexpr u32 MAX_DEPTH_FORMATS = 3; + +enum class TextureFormat : u8 { + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + D16 = 5, + D24 = 6, + D24S8 = 7, + Undefined = 255 +}; + +enum class TextureType : u8 { + Texture1D = 0, + Texture2D = 1, + Texture3D = 2, + Undefined = 255 +}; + +enum class TextureViewType : u8 { + View1D = 0, + View2D = 1, + View3D = 2, + ViewCube = 3, + View1DArray = 4, + View2DArray = 5, + ViewCubeArray = 6, + Undefined = 255 +}; + +/** + * A rectangle describing part of a texture + * @param x, y are the offset from the bottom left corner + * @param width, height are the extent of the rectangle + */ +struct Rect2D { + s32 x = 0; + s32 y = 0; + u32 width = 0; + u32 height = 0; +}; + +/** + * Information about a texture packed to 8 bytes + */ +struct TextureInfo { + u16 width = 0; + u16 height = 0; + u8 levels = 0; + TextureType type = TextureType::Undefined; + TextureViewType view_type = TextureViewType::Undefined; + TextureFormat format = TextureFormat::Undefined; + + const u64 Hash() const { + return Common::ComputeStructHash64(*this); + } +}; + +static_assert(sizeof(TextureInfo) == 8, "TextureInfo not packed!"); +static_assert(std::is_standard_layout_v, "TextureInfo is not a standard layout!"); + +class TextureBase; +using TextureHandle = IntrusivePtr; + +class TextureBase : public IntrusivePtrEnabled { +public: + TextureBase() = default; + TextureBase(const TextureInfo& info) : info(info) {} + virtual ~TextureBase() = default; + + /// Uploads pixel data to the GPU memory + virtual void Upload(Rect2D rectangle, u32 stride, std::span data, + u32 level = 0) {}; + + /// Downloads pixel data from GPU memory + virtual void Download(Rect2D rectangle, u32 stride, std::span data, + u32 level = 0) {}; + + /// Copies the rectangle area specified to the destionation texture + virtual void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect, + u32 src_level = 0, u32 dest_level = 0) {}; + + /// Returns the unique texture identifier + const u64 GetHash() const { + return info.Hash(); + } + + /// Returns the width of the texture + u16 GetWidth() const { + return info.width; + } + + /// Returns the height of the texture + u16 GetHeight() const { + return info.height; + } + + /// Returns the number of mipmap levels allocated + u16 GetMipLevels() const { + return info.levels; + } + + /// Returns the pixel format + TextureFormat GetFormat() const { + return info.format; + } + +protected: + TextureInfo info; +}; + +struct SamplerInfo { + Pica::TextureFilter mag_filter; + Pica::TextureFilter min_filter; + Pica::TextureFilter mip_filter; + Pica::WrapMode wrap_s; + Pica::WrapMode wrap_t; + u32 border_color = 0; + u32 lod_min = 0; + u32 lod_max = 0; + s32 lod_bias = 0; + + const u64 Hash() const { + return Common::ComputeStructHash64(*this); + } +}; + +class SamplerBase : public IntrusivePtrEnabled { +public: + SamplerBase(SamplerInfo info) : info(info) {} + virtual ~SamplerBase() = default; + +protected: + SamplerInfo info{}; +}; + +using SamplerHandle = IntrusivePtr; + +} // namespace VideoCore + +namespace std { +template <> +struct hash { + std::size_t operator()(const VideoCore::TextureInfo& info) const noexcept { + return info.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const VideoCore::SamplerInfo& info) const noexcept { + return info.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp new file mode 100644 index 000000000..78a2e33ef --- /dev/null +++ b/src/video_core/gpu.cpp @@ -0,0 +1,60 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "core/core.h" +#include "video_core/pica.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" + +std::unique_ptr CreateRenderer(Core::System& system, + Frontend::EmuWindow& emu_window) { + auto& telemetry_session = system.TelemetrySession(); + auto& cpu_memory = system.Memory(); + + switch (Settings::values.renderer_backend) { + case Settings::RendererBackend::OpenGL: + return std::make_unique(emu_window); + case Settings::RendererBackend::Vulkan: + return std::make_unique(emu_window); + default: + return nullptr; + } +} + +namespace Pica { + +GPU::GPU(Core::System& system, Memory::MemorySystem& memory) : + system(system), memory(memory) { + //renderer = CreateRenderer(system, ) + rasterizer = renderer->Rasterizer(); +} + +void GPU::SwapBuffers() { + renderer->SwapBuffers(); +} + +void GPU::FlushAll() { + rasterizer->FlushAll(); +} + +void GPU::FlushRegion(PAddr addr, u32 size) { + rasterizer->FlushRegion(addr, size); +} + +void GPU::InvalidateRegion(PAddr addr, u32 size) { + rasterizer->InvalidateRegion(addr, size); +} + +void GPU::FlushAndInvalidateRegion(PAddr addr, u32 size) { + rasterizer->FlushAndInvalidateRegion(addr, size); +} + +void GPU::ClearAll(bool flush) { + rasterizer->ClearAll(flush); +} + +} // namespace Pica diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h new file mode 100644 index 000000000..bb3128cfd --- /dev/null +++ b/src/video_core/gpu.h @@ -0,0 +1,81 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "core/frontend/framebuffer_layout.h" +#include "video_core/maestro.h" + +namespace Core { +class System; +} + +namespace Memory { +class MemorySystem; +} + +namespace Frontend { +class EmuWindow; +} + +namespace VideoCore { +class RendererBase; +class RasterizerInterface; +} + +namespace Pica { + +class Maestro; + +enum class ResultStatus { + Success, + ErrorGenericDrivers, + ErrorUnsupportedGL, +}; + +/** + * Interface for the PICA GPU + */ +class GPU { +public: + GPU(Core::System& system, Memory::MemorySystem& memory); + ~GPU() = default; + + /// Swap buffers (render frame) + void SwapBuffers(); + + /// Notify rasterizer that all caches should be flushed to 3DS memory + void FlushAll(); + + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory + void FlushRegion(PAddr addr, u32 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(PAddr addr, u32 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(PAddr addr, u32 size); + + /// Removes as much state as possible from the rasterizer in preparation for a save/load state + void ClearAll(bool flush); + + /// Request a screenshot of the next frame + void RequestScreenshot(u8* data, std::function callback, + const Layout::FramebufferLayout& layout); + + /// Returns the resolution scale factor + u16 GetResolutionScaleFactor(); + +private: + Core::System& system; + Memory::MemorySystem& memory; + + // Renderer + VideoCore::RasterizerInterface* rasterizer = nullptr; + std::unique_ptr renderer = nullptr; + std::unique_ptr maestro = nullptr; +}; + +} // namespace VideoCore diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index 283cb330d..d2f8874fe 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -34,13 +34,13 @@ template void Zero(T& o) { static_assert(std::is_trivially_copyable_v, "It's undefined behavior to memset a non-trivially copyable type"); - std::memset(&o, 0, sizeof(o)); + memset(&o, 0, sizeof(o)); } State::State() : geometry_pipeline(*this) { auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, + auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 6d6ff0f63..689d989b7 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -4,7 +4,6 @@ #pragma once -#include "video_core/regs_texturing.h" namespace Pica { /// Initialize Pica state diff --git a/src/video_core/pica_regs.inc b/src/video_core/pica_regs.inc new file mode 100644 index 000000000..5844d5c64 --- /dev/null +++ b/src/video_core/pica_regs.inc @@ -0,0 +1,400 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +//#define PICA_REG(name, address) +PICA_REG(FINALIZE, 0x010) +PICA_REG(FACECULLING_CONFIG, 0x040) +PICA_REG(VIEWPORT_WIDTH, 0x041) +PICA_REG(VIEWPORT_INVW, 0x042) +PICA_REG(VIEWPORT_HEIGHT, 0x043) +PICA_REG(VIEWPORT_INVH, 0x044) +PICA_REG(FRAGOP_CLIP, 0x047) +PICA_REG(FRAGOP_CLIP_DATA0, 0x048) +PICA_REG(FRAGOP_CLIP_DATA1, 0x049) +PICA_REG(FRAGOP_CLIP_DATA2, 0x04A) +PICA_REG(FRAGOP_CLIP_DATA3, 0x04B) +PICA_REG(DEPTHMAP_SCALE, 0x04D) +PICA_REG(DEPTHMAP_OFFSET, 0x04E) +PICA_REG(SH_OUTMAP_TOTAL, 0x04F) +PICA_REG(SH_OUTMAP_O0, 0x050) +PICA_REG(SH_OUTMAP_O1, 0x051) +PICA_REG(SH_OUTMAP_O2, 0x052) +PICA_REG(SH_OUTMAP_O3, 0x053) +PICA_REG(SH_OUTMAP_O4, 0x054) +PICA_REG(SH_OUTMAP_O5, 0x055) +PICA_REG(SH_OUTMAP_O6, 0x056) +PICA_REG(EARLYDEPTH_FUNC, 0x061) +PICA_REG(EARLYDEPTH_TEST1, 0x062) +PICA_REG(EARLYDEPTH_CLEAR, 0x063) +PICA_REG(SH_OUTATTR_MODE, 0x064) +PICA_REG(SCISSORTEST_MODE, 0x065) +PICA_REG(SCISSORTEST_POS, 0x066) +PICA_REG(SCISSORTEST_DIM, 0x067) +PICA_REG(VIEWPORT_XY, 0x068) +PICA_REG(EARLYDEPTH_DATA, 0x06A) +PICA_REG(DEPTHMAP_ENABLE, 0x06D) +PICA_REG(RENDERBUF_DIM, 0x06E) +PICA_REG(SH_OUTATTR_CLOCK, 0x06F) +PICA_REG(TEXUNIT_CONFIG, 0x080) +PICA_REG(TEXUNIT0_BORDER_COLOR, 0x081) +PICA_REG(TEXUNIT0_DIM, 0x082) +PICA_REG(TEXUNIT0_PARAM, 0x083) +PICA_REG(TEXUNIT0_LOD, 0x084) +PICA_REG(TEXUNIT0_ADDR1, 0x085) +PICA_REG(TEXUNIT0_ADDR2, 0x086) +PICA_REG(TEXUNIT0_ADDR3, 0x087) +PICA_REG(TEXUNIT0_ADDR4, 0x088) +PICA_REG(TEXUNIT0_ADDR5, 0x089) +PICA_REG(TEXUNIT0_ADDR6, 0x08A) +PICA_REG(TEXUNIT0_SHADOW, 0x08B) +PICA_REG(TEXUNIT0_TYPE, 0x08E) +PICA_REG(LIGHTING_ENABLE0, 0x08F) +PICA_REG(TEXUNIT1_BORDER_COLOR, 0x091) +PICA_REG(TEXUNIT1_DIM, 0x092) +PICA_REG(TEXUNIT1_PARAM, 0x093) +PICA_REG(TEXUNIT1_LOD, 0x094) +PICA_REG(TEXUNIT1_ADDR, 0x095) +PICA_REG(TEXUNIT1_TYPE, 0x096) +PICA_REG(TEXUNIT2_BORDER_COLOR, 0x099) +PICA_REG(TEXUNIT2_DIM, 0x09A) +PICA_REG(TEXUNIT2_PARAM, 0x09B) +PICA_REG(TEXUNIT2_LOD, 0x09C) +PICA_REG(TEXUNIT2_ADDR, 0x09D) +PICA_REG(TEXUNIT2_TYPE, 0x09E) +PICA_REG(TEXUNIT3_PROCTEX0, 0x0A8) +PICA_REG(TEXUNIT3_PROCTEX1, 0x0A9) +PICA_REG(TEXUNIT3_PROCTEX2, 0x0AA) +PICA_REG(TEXUNIT3_PROCTEX3, 0x0AB) +PICA_REG(TEXUNIT3_PROCTEX4, 0x0AC) +PICA_REG(TEXUNIT3_PROCTEX5, 0x0AD) +PICA_REG(PROCTEX_LUT, 0x0AF) +PICA_REG(PROCTEX_LUT_DATA0, 0x0B0) +PICA_REG(PROCTEX_LUT_DATA1, 0x0B1) +PICA_REG(PROCTEX_LUT_DATA2, 0x0B2) +PICA_REG(PROCTEX_LUT_DATA3, 0x0B3) +PICA_REG(PROCTEX_LUT_DATA4, 0x0B4) +PICA_REG(PROCTEX_LUT_DATA5, 0x0B5) +PICA_REG(PROCTEX_LUT_DATA6, 0x0B6) +PICA_REG(PROCTEX_LUT_DATA7, 0x0B7) +PICA_REG(TEXENV0_SOURCE, 0x0C0) +PICA_REG(TEXENV0_OPERAND, 0x0C1) +PICA_REG(TEXENV0_COMBINER, 0x0C2) +PICA_REG(TEXENV0_COLOR, 0x0C3) +PICA_REG(TEXENV0_SCALE, 0x0C4) +PICA_REG(TEXENV1_SOURCE, 0x0C8) +PICA_REG(TEXENV1_OPERAND, 0x0C9) +PICA_REG(TEXENV1_COMBINER, 0x0CA) +PICA_REG(TEXENV1_COLOR, 0x0CB) +PICA_REG(TEXENV1_SCALE, 0x0CC) +PICA_REG(TEXENV2_SOURCE, 0x0D0) +PICA_REG(TEXENV2_OPERAND, 0x0D1) +PICA_REG(TEXENV2_COMBINER, 0x0D2) +PICA_REG(TEXENV2_COLOR, 0x0D3) +PICA_REG(TEXENV2_SCALE, 0x0D4) +PICA_REG(TEXENV3_SOURCE, 0x0D8) +PICA_REG(TEXENV3_OPERAND, 0x0D9) +PICA_REG(TEXENV3_COMBINER, 0x0DA) +PICA_REG(TEXENV3_COLOR, 0x0DB) +PICA_REG(TEXENV3_SCALE, 0x0DC) +PICA_REG(TEXENV_UPDATE_BUFFER, 0x0E0) +PICA_REG(FOG_COLOR, 0x0E1) +PICA_REG(GAS_ATTENUATION, 0x0E4) +PICA_REG(GAS_ACCMAX, 0x0E5) +PICA_REG(FOG_LUT_INDEX, 0x0E6) +PICA_REG(FOG_LUT_DATA0, 0x0E8) +PICA_REG(FOG_LUT_DATA1, 0x0E9) +PICA_REG(FOG_LUT_DATA2, 0x0EA) +PICA_REG(FOG_LUT_DATA3, 0x0EB) +PICA_REG(FOG_LUT_DATA4, 0x0EC) +PICA_REG(FOG_LUT_DATA5, 0x0ED) +PICA_REG(FOG_LUT_DATA6, 0x0EE) +PICA_REG(FOG_LUT_DATA7, 0x0EF) +PICA_REG(TEXENV4_SOURCE, 0x0F0) +PICA_REG(TEXENV4_OPERAND, 0x0F1) +PICA_REG(TEXENV4_COMBINER, 0x0F2) +PICA_REG(TEXENV4_COLOR, 0x0F3) +PICA_REG(TEXENV4_SCALE, 0x0F4) +PICA_REG(TEXENV5_SOURCE, 0x0F8) +PICA_REG(TEXENV5_OPERAND, 0x0F9) +PICA_REG(TEXENV5_COMBINER, 0x0FA) +PICA_REG(TEXENV5_COLOR, 0x0FB) +PICA_REG(TEXENV5_SCALE, 0x0FC) +PICA_REG(TEXENV_BUFFER_COLOR, 0x0FD) +PICA_REG(COLOR_OPERATION, 0x100) +PICA_REG(BLEND_FUNC, 0x101) +PICA_REG(LOGIC_OP, 0x102) +PICA_REG(BLEND_COLOR, 0x103) +PICA_REG(FRAGOP_ALPHA_TEST, 0x104) +PICA_REG(STENCIL_TEST, 0x105) +PICA_REG(STENCIL_OP, 0x106) +PICA_REG(DEPTH_COLOR_MASK, 0x107) +PICA_REG(FRAMEBUFFER_INVALIDATE, 0x110) +PICA_REG(FRAMEBUFFER_FLUSH, 0x111) +PICA_REG(COLORBUFFER_READ, 0x112) +PICA_REG(COLORBUFFER_WRITE, 0x113) +PICA_REG(DEPTHBUFFER_READ, 0x114) +PICA_REG(DEPTHBUFFER_WRITE, 0x115) +PICA_REG(DEPTHBUFFER_FORMAT, 0x116) +PICA_REG(COLORBUFFER_FORMAT, 0x117) +PICA_REG(EARLYDEPTH_TEST2, 0x118) +PICA_REG(FRAMEBUFFER_BLOCK32, 0x11B) +PICA_REG(DEPTHBUFFER_LOC, 0x11C) +PICA_REG(COLORBUFFER_LOC, 0x11D) +PICA_REG(FRAMEBUFFER_DIM, 0x11E) +PICA_REG(GAS_LIGHT_XY, 0x120) +PICA_REG(GAS_LIGHT_Z, 0x121) +PICA_REG(GAS_LIGHT_Z_COLOR, 0x122) +PICA_REG(GAS_LUT_INDEX, 0x123) +PICA_REG(GAS_LUT_DATA, 0x124) +PICA_REG(GAS_DELTAZ_DEPTH, 0x126) +PICA_REG(FRAGOP_SHADOW, 0x130) +PICA_REG(LIGHT0_SPECULAR0, 0x140) +PICA_REG(LIGHT0_SPECULAR1, 0x141) +PICA_REG(LIGHT0_DIFFUSE, 0x142) +PICA_REG(LIGHT0_AMBIENT, 0x143) +PICA_REG(LIGHT0_XY, 0x144) +PICA_REG(LIGHT0_Z, 0x145) +PICA_REG(LIGHT0_SPOTDIR_XY, 0x146) +PICA_REG(LIGHT0_SPOTDIR_Z, 0x147) +PICA_REG(LIGHT0_CONFIG, 0x149) +PICA_REG(LIGHT0_ATTENUATION_BIAS, 0x14A) +PICA_REG(LIGHT0_ATTENUATION_SCALE, 0x14B) +PICA_REG(LIGHT1_SPECULAR0, 0x150) +PICA_REG(LIGHT1_SPECULAR1, 0x151) +PICA_REG(LIGHT1_DIFFUSE, 0x152) +PICA_REG(LIGHT1_AMBIENT, 0x153) +PICA_REG(LIGHT1_XY, 0x154) +PICA_REG(LIGHT1_Z, 0x155) +PICA_REG(LIGHT1_SPOTDIR_XY, 0x156) +PICA_REG(LIGHT1_SPOTDIR_Z, 0x157) +PICA_REG(LIGHT1_CONFIG, 0x159) +PICA_REG(LIGHT1_ATTENUATION_BIAS, 0x15A) +PICA_REG(LIGHT1_ATTENUATION_SCALE, 0x15B) +PICA_REG(LIGHT2_SPECULAR0, 0x160) +PICA_REG(LIGHT2_SPECULAR1, 0x161) +PICA_REG(LIGHT2_DIFFUSE, 0x162) +PICA_REG(LIGHT2_AMBIENT, 0x163) +PICA_REG(LIGHT2_XY, 0x164) +PICA_REG(LIGHT2_Z, 0x165) +PICA_REG(LIGHT2_SPOTDIR_XY, 0x166) +PICA_REG(LIGHT2_SPOTDIR_Z, 0x167) +PICA_REG(LIGHT2_CONFIG, 0x169) +PICA_REG(LIGHT2_ATTENUATION_BIAS, 0x16A) +PICA_REG(LIGHT2_ATTENUATION_SCALE, 0x16B) +PICA_REG(LIGHT3_SPECULAR0, 0x170) +PICA_REG(LIGHT3_SPECULAR1, 0x171) +PICA_REG(LIGHT3_DIFFUSE, 0x172) +PICA_REG(LIGHT3_AMBIENT, 0x173) +PICA_REG(LIGHT3_XY, 0x174) +PICA_REG(LIGHT3_Z, 0x175) +PICA_REG(LIGHT3_SPOTDIR_XY, 0x176) +PICA_REG(LIGHT3_SPOTDIR_Z, 0x177) +PICA_REG(LIGHT3_CONFIG, 0x179) +PICA_REG(LIGHT3_ATTENUATION_BIAS, 0x17A) +PICA_REG(LIGHT3_ATTENUATION_SCALE, 0x17B) +PICA_REG(LIGHT4_SPECULAR0, 0x180) +PICA_REG(LIGHT4_SPECULAR1, 0x181) +PICA_REG(LIGHT4_DIFFUSE, 0x182) +PICA_REG(LIGHT4_AMBIENT, 0x183) +PICA_REG(LIGHT4_XY, 0x184) +PICA_REG(LIGHT4_Z, 0x185) +PICA_REG(LIGHT4_SPOTDIR_XY, 0x186) +PICA_REG(LIGHT4_SPOTDIR_Z, 0x187) +PICA_REG(LIGHT4_CONFIG, 0x189) +PICA_REG(LIGHT4_ATTENUATION_BIAS, 0x18A) +PICA_REG(LIGHT4_ATTENUATION_SCALE, 0x18B) +PICA_REG(LIGHT5_SPECULAR0, 0x190) +PICA_REG(LIGHT5_SPECULAR1, 0x191) +PICA_REG(LIGHT5_DIFFUSE, 0x192) +PICA_REG(LIGHT5_AMBIENT, 0x193) +PICA_REG(LIGHT5_XY, 0x194) +PICA_REG(LIGHT5_Z, 0x195) +PICA_REG(LIGHT5_SPOTDIR_XY, 0x196) +PICA_REG(LIGHT5_SPOTDIR_Z, 0x197) +PICA_REG(LIGHT5_CONFIG, 0x199) +PICA_REG(LIGHT5_ATTENUATION_BIAS, 0x19A) +PICA_REG(LIGHT5_ATTENUATION_SCALE, 0x19B) +PICA_REG(LIGHT6_SPECULAR0, 0x1A0) +PICA_REG(LIGHT6_SPECULAR1, 0x1A1) +PICA_REG(LIGHT6_DIFFUSE, 0x1A2) +PICA_REG(LIGHT6_AMBIENT, 0x1A3) +PICA_REG(LIGHT6_XY, 0x1A4) +PICA_REG(LIGHT6_Z, 0x1A5) +PICA_REG(LIGHT6_SPOTDIR_XY, 0x1A6) +PICA_REG(LIGHT6_SPOTDIR_Z, 0x1A7) +PICA_REG(LIGHT6_CONFIG, 0x1A9) +PICA_REG(LIGHT6_ATTENUATION_BIAS, 0x1AA) +PICA_REG(LIGHT6_ATTENUATION_SCALE, 0x1AB) +PICA_REG(LIGHT7_SPECULAR0, 0x1B0) +PICA_REG(LIGHT7_SPECULAR1, 0x1B1) +PICA_REG(LIGHT7_DIFFUSE, 0x1B2) +PICA_REG(LIGHT7_AMBIENT, 0x1B3) +PICA_REG(LIGHT7_XY, 0x1B4) +PICA_REG(LIGHT7_Z, 0x1B5) +PICA_REG(LIGHT7_SPOTDIR_XY, 0x1B6) +PICA_REG(LIGHT7_SPOTDIR_Z, 0x1B7) +PICA_REG(LIGHT7_CONFIG, 0x1B9) +PICA_REG(LIGHT7_ATTENUATION_BIAS, 0x1BA) +PICA_REG(LIGHT7_ATTENUATION_SCALE, 0x1BB) +PICA_REG(LIGHTING_AMBIENT, 0x1C0) +PICA_REG(LIGHTING_NUM_LIGHTS, 0x1C2) +PICA_REG(LIGHTING_CONFIG0, 0x1C3) +PICA_REG(LIGHTING_CONFIG1, 0x1C4) +PICA_REG(LIGHTING_LUT_INDEX, 0x1C5) +PICA_REG(LIGHTING_ENABLE1, 0x1C6) +PICA_REG(LIGHTING_LUT_DATA0, 0x1C8) +PICA_REG(LIGHTING_LUT_DATA1, 0x1C9) +PICA_REG(LIGHTING_LUT_DATA2, 0x1CA) +PICA_REG(LIGHTING_LUT_DATA3, 0x1CB) +PICA_REG(LIGHTING_LUT_DATA4, 0x1CC) +PICA_REG(LIGHTING_LUT_DATA5, 0x1CD) +PICA_REG(LIGHTING_LUT_DATA6, 0x1CE) +PICA_REG(LIGHTING_LUT_DATA7, 0x1CF) +PICA_REG(LIGHTING_LUTINPUT_ABS, 0x1D0) +PICA_REG(LIGHTING_LUTINPUT_SELECT, 0x1D1) +PICA_REG(LIGHTING_LUTINPUT_SCALE, 0x1D2) +PICA_REG(LIGHTING_LIGHT_PERMUTATION, 0x1D9) +PICA_REG(ATTRIBBUFFERS_LOC, 0x200) +PICA_REG(ATTRIBBUFFERS_FORMAT_LOW, 0x201) +PICA_REG(ATTRIBBUFFERS_FORMAT_HIGH, 0x202) +PICA_REG(ATTRIBBUFFER0_OFFSET, 0x203) +PICA_REG(ATTRIBBUFFER0_CONFIG1, 0x204) +PICA_REG(ATTRIBBUFFER0_CONFIG2, 0x205) +PICA_REG(ATTRIBBUFFER1_OFFSET, 0x206) +PICA_REG(ATTRIBBUFFER1_CONFIG1, 0x207) +PICA_REG(ATTRIBBUFFER1_CONFIG2, 0x208) +PICA_REG(ATTRIBBUFFER2_OFFSET, 0x209) +PICA_REG(ATTRIBBUFFER2_CONFIG1, 0x20A) +PICA_REG(ATTRIBBUFFER2_CONFIG2, 0x20B) +PICA_REG(ATTRIBBUFFER3_OFFSET, 0x20C) +PICA_REG(ATTRIBBUFFER3_CONFIG1, 0x20D) +PICA_REG(ATTRIBBUFFER3_CONFIG2, 0x20E) +PICA_REG(ATTRIBBUFFER4_OFFSET, 0x20F) +PICA_REG(ATTRIBBUFFER4_CONFIG1, 0x210) +PICA_REG(ATTRIBBUFFER4_CONFIG2, 0x211) +PICA_REG(ATTRIBBUFFER5_OFFSET, 0x212) +PICA_REG(ATTRIBBUFFER5_CONFIG1, 0x213) +PICA_REG(ATTRIBBUFFER5_CONFIG2, 0x214) +PICA_REG(ATTRIBBUFFER6_OFFSET, 0x215) +PICA_REG(ATTRIBBUFFER6_CONFIG1, 0x216) +PICA_REG(ATTRIBBUFFER6_CONFIG2, 0x217) +PICA_REG(ATTRIBBUFFER7_OFFSET, 0x218) +PICA_REG(ATTRIBBUFFER7_CONFIG1, 0x219) +PICA_REG(ATTRIBBUFFER7_CONFIG2, 0x21A) +PICA_REG(ATTRIBBUFFER8_OFFSET, 0x21B) +PICA_REG(ATTRIBBUFFER8_CONFIG1, 0x21C) +PICA_REG(ATTRIBBUFFER8_CONFIG2, 0x21D) +PICA_REG(ATTRIBBUFFER9_OFFSET, 0x21E) +PICA_REG(ATTRIBBUFFER9_CONFIG1, 0x21F) +PICA_REG(ATTRIBBUFFER9_CONFIG2, 0x220) +PICA_REG(ATTRIBBUFFER10_OFFSET, 0x221) +PICA_REG(ATTRIBBUFFER10_CONFIG1, 0x222) +PICA_REG(ATTRIBBUFFER10_CONFIG2, 0x223) +PICA_REG(ATTRIBBUFFER11_OFFSET, 0x224) +PICA_REG(ATTRIBBUFFER11_CONFIG1, 0x225) +PICA_REG(ATTRIBBUFFER11_CONFIG2, 0x226) +PICA_REG(INDEXBUFFER_CONFIG, 0x227) +PICA_REG(NUMVERTICES, 0x228) +PICA_REG(GEOSTAGE_CONFIG, 0x229) +PICA_REG(VERTEX_OFFSET, 0x22A) +PICA_REG(POST_VERTEX_CACHE_NUM, 0x22D) +PICA_REG(DRAWARRAYS, 0x22E) +PICA_REG(DRAWELEMENTS, 0x22F) +PICA_REG(VTX_FUNC, 0x231) +PICA_REG(FIXEDATTRIB_INDEX, 0x232) +PICA_REG(FIXEDATTRIB_DATA0, 0x233) +PICA_REG(FIXEDATTRIB_DATA1, 0x234) +PICA_REG(FIXEDATTRIB_DATA2, 0x235) +PICA_REG(CMDBUF_SIZE0, 0x238) +PICA_REG(CMDBUF_SIZE1, 0x239) +PICA_REG(CMDBUF_ADDR0, 0x23A) +PICA_REG(CMDBUF_ADDR1, 0x23B) +PICA_REG(CMDBUF_JUMP0, 0x23C) +PICA_REG(CMDBUF_JUMP1, 0x23D) +PICA_REG(VSH_NUM_ATTR, 0x242) +PICA_REG(VSH_COM_MODE, 0x244) +PICA_REG(START_DRAW_FUNC0, 0x245) +PICA_REG(VSH_OUTMAP_TOTAL1, 0x24A) +PICA_REG(VSH_OUTMAP_TOTAL2, 0x251) +PICA_REG(GSH_MISC0, 0x252) +PICA_REG(GEOSTAGE_CONFIG2, 0x253) +PICA_REG(GSH_MISC1, 0x254) +PICA_REG(PRIMITIVE_CONFIG, 0x25E) +PICA_REG(RESTART_PRIMITIVE, 0x25F) +PICA_REG(GSH_BOOLUNIFORM, 0x280) +PICA_REG(GSH_INTUNIFORM_I0, 0x281) +PICA_REG(GSH_INTUNIFORM_I1, 0x282) +PICA_REG(GSH_INTUNIFORM_I2, 0x283) +PICA_REG(GSH_INTUNIFORM_I3, 0x284) +PICA_REG(GSH_INPUTBUFFER_CONFIG, 0x289) +PICA_REG(GSH_ENTRYPOINT, 0x28A) +PICA_REG(GSH_ATTRIBUTES_PERMUTATION_LOW, 0x28B) +PICA_REG(GSH_ATTRIBUTES_PERMUTATION_HIGH, 0x28C) +PICA_REG(GSH_OUTMAP_MASK, 0x28D) +PICA_REG(GSH_CODETRANSFER_END, 0x28F) +PICA_REG(GSH_FLOATUNIFORM_INDEX, 0x290) +PICA_REG(GSH_FLOATUNIFORM_DATA0, 0x291) +PICA_REG(GSH_FLOATUNIFORM_DATA1, 0x292) +PICA_REG(GSH_FLOATUNIFORM_DATA2, 0x293) +PICA_REG(GSH_FLOATUNIFORM_DATA3, 0x294) +PICA_REG(GSH_FLOATUNIFORM_DATA4, 0x295) +PICA_REG(GSH_FLOATUNIFORM_DATA5, 0x296) +PICA_REG(GSH_FLOATUNIFORM_DATA6, 0x297) +PICA_REG(GSH_FLOATUNIFORM_DATA7, 0x298) +PICA_REG(GSH_CODETRANSFER_INDEX, 0x29B) +PICA_REG(GSH_CODETRANSFER_DATA0, 0x29C) +PICA_REG(GSH_CODETRANSFER_DATA1, 0x29D) +PICA_REG(GSH_CODETRANSFER_DATA2, 0x29E) +PICA_REG(GSH_CODETRANSFER_DATA3, 0x29F) +PICA_REG(GSH_CODETRANSFER_DATA4, 0x2A0) +PICA_REG(GSH_CODETRANSFER_DATA5, 0x2A1) +PICA_REG(GSH_CODETRANSFER_DATA6, 0x2A2) +PICA_REG(GSH_CODETRANSFER_DATA7, 0x2A3) +PICA_REG(GSH_OPDESCS_INDEX, 0x2A5) +PICA_REG(GSH_OPDESCS_DATA0, 0x2A6) +PICA_REG(GSH_OPDESCS_DATA1, 0x2A7) +PICA_REG(GSH_OPDESCS_DATA2, 0x2A8) +PICA_REG(GSH_OPDESCS_DATA3, 0x2A9) +PICA_REG(GSH_OPDESCS_DATA4, 0x2AA) +PICA_REG(GSH_OPDESCS_DATA5, 0x2AB) +PICA_REG(GSH_OPDESCS_DATA6, 0x2AC) +PICA_REG(GSH_OPDESCS_DATA7, 0x2AD) +PICA_REG(VSH_BOOLUNIFORM, 0x2B0) +PICA_REG(VSH_INTUNIFORM_I0, 0x2B1) +PICA_REG(VSH_INTUNIFORM_I1, 0x2B2) +PICA_REG(VSH_INTUNIFORM_I2, 0x2B3) +PICA_REG(VSH_INTUNIFORM_I3, 0x2B4) +PICA_REG(VSH_INPUTBUFFER_CONFIG, 0x2B9) +PICA_REG(VSH_ENTRYPOINT, 0x2BA) +PICA_REG(VSH_ATTRIBUTES_PERMUTATION_LOW, 0x2BB) +PICA_REG(VSH_ATTRIBUTES_PERMUTATION_HIGH, 0x2BC) +PICA_REG(VSH_OUTMAP_MASK, 0x2BD) +PICA_REG(VSH_CODETRANSFER_END, 0x2BF) +PICA_REG(VSH_FLOATUNIFORM_INDEX, 0x2C0) +PICA_REG(VSH_FLOATUNIFORM_DATA0, 0x2C1) +PICA_REG(VSH_FLOATUNIFORM_DATA1, 0x2C2) +PICA_REG(VSH_FLOATUNIFORM_DATA2, 0x2C3) +PICA_REG(VSH_FLOATUNIFORM_DATA3, 0x2C4) +PICA_REG(VSH_FLOATUNIFORM_DATA4, 0x2C5) +PICA_REG(VSH_FLOATUNIFORM_DATA5, 0x2C6) +PICA_REG(VSH_FLOATUNIFORM_DATA6, 0x2C7) +PICA_REG(VSH_FLOATUNIFORM_DATA7, 0x2C8) +PICA_REG(VSH_CODETRANSFER_INDEX, 0x2CB) +PICA_REG(VSH_CODETRANSFER_DATA0, 0x2CC) +PICA_REG(VSH_CODETRANSFER_DATA1, 0x2CD) +PICA_REG(VSH_CODETRANSFER_DATA2, 0x2CE) +PICA_REG(VSH_CODETRANSFER_DATA3, 0x2CF) +PICA_REG(VSH_CODETRANSFER_DATA4, 0x2D0) +PICA_REG(VSH_CODETRANSFER_DATA5, 0x2D1) +PICA_REG(VSH_CODETRANSFER_DATA6, 0x2D2) +PICA_REG(VSH_CODETRANSFER_DATA7, 0x2D3) +PICA_REG(VSH_OPDESCS_INDEX, 0x2D5) +PICA_REG(VSH_OPDESCS_DATA0, 0x2D6) +PICA_REG(VSH_OPDESCS_DATA1, 0x2D7) +PICA_REG(VSH_OPDESCS_DATA2, 0x2D8) +PICA_REG(VSH_OPDESCS_DATA3, 0x2D9) +PICA_REG(VSH_OPDESCS_DATA4, 0x2DA) +PICA_REG(VSH_OPDESCS_DATA5, 0x2DB) +PICA_REG(VSH_OPDESCS_DATA6, 0x2DC) +PICA_REG(VSH_OPDESCS_DATA7, 0x2DD) diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 33012c259..e964bdd81 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -1,4 +1,4 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2022 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -22,40 +22,44 @@ namespace Pica { * * @todo Verify on HW if this conversion is sufficiently accurate. */ -template +template struct Float { + static constexpr u32 width = M + E + 1; + static constexpr u32 bias = 128 - (1 << (E - 1)); + static constexpr u32 exponent_mask = (1 << E) - 1; + static constexpr u32 mantissa_mask = (1 << M) - 1; + static constexpr u32 sign_mask = 1 << (E + M); public: - static Float FromFloat32(float val) { - Float ret; + static Float FromFloat32(float val) { + Float ret; ret.value = val; return ret; } - static Float FromRaw(u32 hex) { - Float res; + static Float FromRaw(u32 hex) { + Float res; - const int width = M + E + 1; - const int bias = 128 - (1 << (E - 1)); - int exponent = (hex >> M) & ((1 << E) - 1); - const unsigned mantissa = hex & ((1 << M) - 1); - const unsigned sign = (hex >> (E + M)) << 31; + u32 exponent = (hex >> M) & exponent_mask; + const u32 mantissa = hex & mantissa_mask; + const u32 sign = (hex & sign_mask) << (31 - M - E); - if (hex & ((1 << (width - 1)) - 1)) { - if (exponent == (1 << E) - 1) + if (hex & (mantissa_mask | (exponent_mask << M))) { + if (exponent == exponent_mask) { exponent = 255; - else + } else { exponent += bias; + } + hex = sign | (mantissa << (23 - M)) | (exponent << 23); } else { hex = sign; } std::memcpy(&res.value, &hex, sizeof(float)); - return res; } - static Float Zero() { + static Float Zero() { return FromFloat32(0.f); } @@ -64,80 +68,77 @@ public: return value; } - Float operator*(const Float& flt) const { + Float operator*(const Float& flt) const { float result = value * flt.ToFloat32(); // PICA gives 0 instead of NaN when multiplying by inf - if (std::isnan(result)) - if (!std::isnan(value) && !std::isnan(flt.ToFloat32())) - result = 0.f; - return Float::FromFloat32(result); + if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) { + result = 0.f; + } + + return Float::FromFloat32(result); } - Float operator/(const Float& flt) const { - return Float::FromFloat32(ToFloat32() / flt.ToFloat32()); + Float operator/(const Float& flt) const { + return Float::FromFloat32(ToFloat32() / flt.ToFloat32()); } - Float operator+(const Float& flt) const { - return Float::FromFloat32(ToFloat32() + flt.ToFloat32()); + Float operator+(const Float& flt) const { + return Float::FromFloat32(ToFloat32() + flt.ToFloat32()); } - Float operator-(const Float& flt) const { - return Float::FromFloat32(ToFloat32() - flt.ToFloat32()); + Float operator-(const Float& flt) const { + return Float::FromFloat32(ToFloat32() - flt.ToFloat32()); } - Float& operator*=(const Float& flt) { + Float& operator*=(const Float& flt) { value = operator*(flt).value; return *this; } - Float& operator/=(const Float& flt) { + Float& operator/=(const Float& flt) { value /= flt.ToFloat32(); return *this; } - Float& operator+=(const Float& flt) { + Float& operator+=(const Float& flt) { value += flt.ToFloat32(); return *this; } - Float& operator-=(const Float& flt) { + Float& operator-=(const Float& flt) { value -= flt.ToFloat32(); return *this; } - Float operator-() const { - return Float::FromFloat32(-ToFloat32()); + Float operator-() const { + return Float::FromFloat32(-ToFloat32()); } - bool operator<(const Float& flt) const { + bool operator<(const Float& flt) const { return ToFloat32() < flt.ToFloat32(); } - bool operator>(const Float& flt) const { + bool operator>(const Float& flt) const { return ToFloat32() > flt.ToFloat32(); } - bool operator>=(const Float& flt) const { + bool operator>=(const Float& flt) const { return ToFloat32() >= flt.ToFloat32(); } - bool operator<=(const Float& flt) const { + bool operator<=(const Float& flt) const { return ToFloat32() <= flt.ToFloat32(); } - bool operator==(const Float& flt) const { + bool operator==(const Float& flt) const { return ToFloat32() == flt.ToFloat32(); } - bool operator!=(const Float& flt) const { + bool operator!=(const Float& flt) const { return ToFloat32() != flt.ToFloat32(); } private: - static const unsigned MASK = (1 << (M + E + 1)) - 1; - static const unsigned MANTISSA_MASK = (1 << M) - 1; - static const unsigned EXPONENT_MASK = (1 << E) - 1; - // Stored as a regular float, merely for convenience // TODO: Perform proper arithmetic on this! float value; @@ -153,4 +154,4 @@ using float24 = Float<16, 7>; using float20 = Float<12, 7>; using float16 = Float<10, 5>; -} // namespace Pica +} // namespace Pica \ No newline at end of file diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index e8367dcbd..873e4273e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -13,10 +13,6 @@ namespace OpenGL { struct ScreenInfo; } -namespace Vulkan { -struct ScreenInfo; -} - namespace Pica::Shader { struct OutputVertex; } // namespace Pica::Shader @@ -84,13 +80,6 @@ public: return false; } - /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, - PAddr framebuffer_addr, u32 pixel_stride, - Vulkan::ScreenInfo& screen_info) { - return false; - } - /// Attempt to draw using hardware shaders virtual bool AccelerateDrawBatch(bool is_indexed) { return false; diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h index 596682cbb..07d9c8cb4 100644 --- a/src/video_core/regs_framebuffer.h +++ b/src/video_core/regs_framebuffer.h @@ -14,80 +14,80 @@ namespace Pica { +enum class FragmentOperationMode : u32 { + Default = 0, + Gas = 1, + Shadow = 3, +}; + +enum class LogicOp : u32 { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15, +}; + +enum class BlendEquation : u32 { + Add = 0, + Subtract = 1, + ReverseSubtract = 2, + Min = 3, + Max = 4, +}; + +enum class BlendFactor : u32 { + Zero = 0, + One = 1, + SourceColor = 2, + OneMinusSourceColor = 3, + DestColor = 4, + OneMinusDestColor = 5, + SourceAlpha = 6, + OneMinusSourceAlpha = 7, + DestAlpha = 8, + OneMinusDestAlpha = 9, + ConstantColor = 10, + OneMinusConstantColor = 11, + ConstantAlpha = 12, + OneMinusConstantAlpha = 13, + SourceAlphaSaturate = 14, +}; + +enum class CompareFunc : u32 { + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + LessThan = 4, + LessThanOrEqual = 5, + GreaterThan = 6, + GreaterThanOrEqual = 7, +}; + +enum class StencilAction : u32 { + Keep = 0, + Zero = 1, + Replace = 2, + Increment = 3, + Decrement = 4, + Invert = 5, + IncrementWrap = 6, + DecrementWrap = 7, +}; + struct FramebufferRegs { - enum class FragmentOperationMode : u32 { - Default = 0, - Gas = 1, - Shadow = 3, - }; - - enum class LogicOp : u32 { - Clear = 0, - And = 1, - AndReverse = 2, - Copy = 3, - Set = 4, - CopyInverted = 5, - NoOp = 6, - Invert = 7, - Nand = 8, - Or = 9, - Nor = 10, - Xor = 11, - Equiv = 12, - AndInverted = 13, - OrReverse = 14, - OrInverted = 15, - }; - - enum class BlendEquation : u32 { - Add = 0, - Subtract = 1, - ReverseSubtract = 2, - Min = 3, - Max = 4, - }; - - enum class BlendFactor : u32 { - Zero = 0, - One = 1, - SourceColor = 2, - OneMinusSourceColor = 3, - DestColor = 4, - OneMinusDestColor = 5, - SourceAlpha = 6, - OneMinusSourceAlpha = 7, - DestAlpha = 8, - OneMinusDestAlpha = 9, - ConstantColor = 10, - OneMinusConstantColor = 11, - ConstantAlpha = 12, - OneMinusConstantAlpha = 13, - SourceAlphaSaturate = 14, - }; - - enum class CompareFunc : u32 { - Never = 0, - Always = 1, - Equal = 2, - NotEqual = 3, - LessThan = 4, - LessThanOrEqual = 5, - GreaterThan = 6, - GreaterThanOrEqual = 7, - }; - - enum class StencilAction : u32 { - Keep = 0, - Zero = 1, - Replace = 2, - Increment = 3, - Decrement = 4, - Invert = 5, - IncrementWrap = 6, - DecrementWrap = 7, - }; - struct { union { BitField<0, 2, FragmentOperationMode> fragment_operation_mode; diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index a693d7a05..6a18e7cbf 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h @@ -12,6 +12,13 @@ namespace Pica { +enum class TriangleTopology : u32 { + List = 0, + Strip = 1, + Fan = 2, + Shader = 3, // Programmable setup unit implemented in a geometry shader +}; + struct PipelineRegs { enum class VertexAttributeFormat : u32 { BYTE = 0, @@ -250,13 +257,6 @@ struct PipelineRegs { INSERT_PADDING_WORDS(0x9); - enum class TriangleTopology : u32 { - List = 0, - Strip = 1, - Fan = 2, - Shader = 3, // Programmable setup unit implemented in a geometry shader - }; - BitField<8, 2, TriangleTopology> triangle_topology; u32 restart_primitive; diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 94b9f7502..4767339b6 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -6,21 +6,20 @@ #include #include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" +#include "common/vector_math.h" #include "video_core/pica_types.h" namespace Pica { -struct RasterizerRegs { - enum class CullMode : u32 { - // Select which polygons are considered to be "frontfacing". - KeepAll = 0, - KeepClockWise = 1, - KeepCounterClockWise = 2, - // TODO: What does the third value imply? - }; +// Select which polygons are considered to be "frontfacing". +enum class CullMode : u32 { + KeepAll = 0, + KeepClockWise = 1, + KeepCounterClockWise = 2, + KeepAll2 = 3 // Same as KeepAll +}; +struct RasterizerRegs { union { BitField<0, 2, CullMode> cull_mode; }; diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index 3954e13b4..8c64586bc 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h @@ -13,6 +13,23 @@ namespace Pica { +enum WrapMode : u32 { + ClampToEdge = 0, + ClampToBorder = 1, + Repeat = 2, + MirroredRepeat = 3, + // Mode 4-7 produces some weird result and may be just invalid: + ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat + ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat + Repeat2 = 6, // Same as Repeat + Repeat3 = 7, // Same as Repeat +}; + +enum TextureFilter : u32 { + Nearest = 0, + Linear = 1, +}; + struct TexturingRegs { struct TextureConfig { enum TextureType : u32 { @@ -24,23 +41,6 @@ struct TexturingRegs { Disabled = 5, }; - enum WrapMode : u32 { - ClampToEdge = 0, - ClampToBorder = 1, - Repeat = 2, - MirroredRepeat = 3, - // Mode 4-7 produces some weird result and may be just invalid: - ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat - ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat - Repeat2 = 6, // Same as Repeat - Repeat3 = 7, // Same as Repeat - }; - - enum TextureFilter : u32 { - Nearest = 0, - Linear = 1, - }; - union { u32 raw; BitField<0, 8, u32> r; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 6444b2e3b..3dc569b73 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -5,8 +5,8 @@ #include #include "core/frontend/emu_window.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/swrasterizer/swrasterizer.h" #include "video_core/video_core.h" diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0e04a9fc6..d66045818 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -414,7 +414,7 @@ bool RasterizerOpenGL::SetupGeometryShader() { MICROPROFILE_SCOPE(OpenGL_GS); const auto& regs = Pica::g_state.regs; - if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + if (regs.pipeline.use_gs != Pica::UseGS::No) { LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); return false; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index b285f4d78..a6e1eb156 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -9,18 +9,17 @@ #include #include #include -#include #include "common/assert.h" #include "common/common_types.h" +#include "video_core/shader_compiler/frontend/opcode.h" +#include "video_core/shader_compiler/frontned/instruction.h" +#include "video_core/shader_compiler/frontend/register.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -namespace OpenGL::ShaderDecompiler { +using Pica::Shader::OpCode; +using Pica::Shader::DestRegister; -using nihstro::Instruction; -using nihstro::OpCode; -using nihstro::RegisterType; -using nihstro::SourceRegister; -using nihstro::SwizzlePattern; +namespace OpenGL::ShaderDecompiler { constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 453edf956..527efca24 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -9,7 +9,7 @@ #include #include #include "common/common_types.h" -#include "video_core/shader/shader.h" +#include "video_core/shader_compiler/shader.h" namespace OpenGL::ShaderDecompiler { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 7f49e9b73..fd9c01b07 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -175,11 +175,11 @@ public: void Create(const char* source, GLenum type) { if (shader_or_program.which() == 0) { - boost::get(shader_or_program).Create(source, type); + std::get(shader_or_program).Create(source, type); } else { OGLShader shader; shader.Create(source, type); - OGLProgram& program = boost::get(shader_or_program); + OGLProgram& program = std::get(shader_or_program); program.Create(true, {shader.handle}); SetShaderUniformBlockBindings(program.handle); @@ -191,9 +191,9 @@ public: GLuint GetHandle() const { if (shader_or_program.which() == 0) { - return boost::get(shader_or_program).handle; + return std::get(shader_or_program).handle; } else { - return boost::get(shader_or_program).handle; + return std::get(shader_or_program).handle; } } @@ -204,7 +204,7 @@ public: } private: - boost::variant shader_or_program; + std::variant shader_or_program; }; class TrivialVertexShader { diff --git a/src/video_core/renderer_vulkan/pica_to_vulkan.h b/src/video_core/renderer_vulkan/pica_to_vulkan.h index 1e0aeba6b..a2a7ec93c 100644 --- a/src/video_core/renderer_vulkan/pica_to_vulkan.h +++ b/src/video_core/renderer_vulkan/pica_to_vulkan.h @@ -1,4 +1,4 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2022 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -8,28 +8,32 @@ #include #include "common/logging/log.h" #include "core/core.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_lighting.h" -#include "video_core/regs_texturing.h" +#include "video_core/regs.h" #include "video_core/renderer_vulkan/vk_common.h" namespace PicaToVK { -using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter; - struct FilterInfo { vk::Filter mag_filter, min_filter; vk::SamplerMipmapMode mip_mode; }; -inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) { - std::array filter_table = { vk::Filter::eNearest, vk::Filter::eLinear }; - std::array mipmap_table = { vk::SamplerMipmapMode::eNearest, vk::SamplerMipmapMode::eLinear }; +inline FilterInfo TextureFilterMode(Pica::TextureFilter mag, Pica::TextureFilter min, + Pica::TextureFilter mip) { + constexpr std::array filter_table = { + vk::Filter::eNearest, + vk::Filter::eLinear + }; - return FilterInfo{filter_table[mag], filter_table[min], mipmap_table[mip]}; + constexpr std::array mipmap_table = { + vk::SamplerMipmapMode::eNearest, + vk::SamplerMipmapMode::eLinear + }; + + return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)}; } -inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { +inline vk::SamplerAddressMode WrapMode(Pica::WrapMode mode) { static constexpr std::array wrap_mode_table{{ vk::SamplerAddressMode::eClampToEdge, vk::SamplerAddressMode::eClampToBorder, @@ -63,7 +67,7 @@ inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapM return wrap_mode_table[index]; } -inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) { +inline vk::BlendOp BlendEquation(Pica::BlendEquation equation) { static constexpr std::array blend_equation_table{{ vk::BlendOp::eAdd, vk::BlendOp::eSubtract, @@ -85,7 +89,7 @@ inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) return blend_equation_table[index]; } -inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) { +inline vk::BlendFactor BlendFunc(Pica::BlendFactor factor) { static constexpr std::array blend_func_table{{ vk::BlendFactor::eZero, // BlendFactor::Zero vk::BlendFactor::eOne, // BlendFactor::One @@ -117,7 +121,7 @@ inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) { return blend_func_table[index]; } -inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) { +inline vk::LogicOp LogicOp(Pica::LogicOp op) { static constexpr std::array logic_op_table{{ vk::LogicOp::eClear, // Clear vk::LogicOp::eAnd, // And @@ -150,7 +154,7 @@ inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) { return logic_op_table[index]; } -inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) { +inline vk::CompareOp CompareFunc(Pica::CompareFunc func) { static constexpr std::array compare_func_table{{ vk::CompareOp::eNever, // CompareFunc::Never vk::CompareOp::eAlways, // CompareFunc::Always @@ -175,7 +179,7 @@ inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) { return compare_func_table[index]; } -inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) { +inline vk::StencilOp StencilOp(Pica::StencilAction action) { static constexpr std::array stencil_op_table{{ vk::StencilOp::eKeep, // StencilAction::Keep vk::StencilOp::eZero, // StencilAction::Zero @@ -200,6 +204,30 @@ inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) { return stencil_op_table[index]; } +inline vk::PrimitiveTopology PrimitiveTopology(Pica::TriangleTopology topology) { + switch (topology) { + case Pica::TriangleTopology::Fan: + return vk::PrimitiveTopology::eTriangleFan; + case Pica::TriangleTopology::List: + case Pica::TriangleTopology::Shader: + return vk::PrimitiveTopology::eTriangleList; + case Pica::TriangleTopology::Strip: + return vk::PrimitiveTopology::eTriangleStrip; + } +} + +inline vk::CullModeFlags CullMode(Pica::CullMode mode) { + switch (mode) { + case Pica::CullMode::KeepAll: + case Pica::CullMode::KeepAll2: + return vk::CullModeFlagBits::eNone; + case Pica::CullMode::KeepClockWise: + return vk::CullModeFlagBits::eBack; + case Pica::CullMode::KeepCounterClockWise: + return vk::CullModeFlagBits::eFront; + } +} + inline glm::vec4 ColorRGBA8(const u32 color) { return glm::vec4{ (color >> 0 & 0xFF) / 255.0f, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ca05ed14f..3a5b80b60 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -2,22 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// Enable vulkan platforms -#if defined(ANDROID) || defined (__ANDROID__) - #define VK_USE_PLATFORM_ANDROID_KHR 1 -#elif defined(_WIN32) - #define VK_USE_PLATFORM_WIN32_KHR 1 -#elif defined(__APPLE__) - #define VK_USE_PLATFORM_MACOS_MVK 1 - #define VK_USE_PLATFORM_METAL_EXT 1 -#else - #ifdef WAYLAND_DISPLAY - #define VK_USE_PLATFORM_WAYLAND_KHR 1 - #else // wayland - #define VK_USE_PLATFORM_XLIB_KHR 1 - #endif -#endif - #include #include "common/assert.h" #include "common/logging/log.h" @@ -56,83 +40,6 @@ namespace Vulkan { -vk::SurfaceKHR CreateSurface(const vk::Instance& instance, - const Frontend::EmuWindow& emu_window) { - const auto& window_info = emu_window.GetWindowInfo(); - vk::SurfaceKHR surface; - -#if VK_USE_PLATFORM_WIN32_KHR - if (window_info.type == Frontend::WindowSystemType::Windows) { - const HWND hWnd = static_cast(window_info.render_surface); - const vk::Win32SurfaceCreateInfoKHR win32_ci{{}, nullptr, hWnd}; - if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); - UNREACHABLE(); - } - } -#elif VK_USE_PLATFORM_XLIB_KHR - if (window_info.type == Frontend::WindowSystemType::X11) { - const vk::XlibSurfaceCreateInfoKHR xlib_ci{{}, - static_cast(window_info.display_connection), - reinterpret_cast(window_info.render_surface)}; - if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); - UNREACHABLE(); - } - } - -#elif VK_USE_PLATFORM_WAYLAND_KHR - if (window_info.type == Frontend::WindowSystemType::Wayland) { - const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{}, - static_cast(window_info.display_connection), - static_cast(window_info.render_surface)}; - if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); - UNREACHABLE(); - } - } -#endif - if (!surface) { - LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); - UNREACHABLE(); - } - - return surface; -} - -std::vector RequiredExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) { - std::vector extensions; - extensions.reserve(6); - switch (window_type) { - case Frontend::WindowSystemType::Headless: - break; -#ifdef _WIN32 - case Frontend::WindowSystemType::Windows: - extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); - break; -#endif -#if !defined(_WIN32) && !defined(__APPLE__) - case Frontend::WindowSystemType::X11: - extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); - break; - case Frontend::WindowSystemType::Wayland: - extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); - break; -#endif - default: - LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); - break; - } - if (window_type != Frontend::WindowSystemType::Headless) { - extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); - } - if (enable_debug_utils) { - extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } - extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); - return extensions; -} - RendererVulkan::RendererVulkan(Frontend::EmuWindow& window) : RendererBase{window} { diff --git a/src/video_core/renderer_vulkan/vk_backend.cpp b/src/video_core/renderer_vulkan/vk_backend.cpp new file mode 100644 index 000000000..5200f7bcb --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_backend.cpp @@ -0,0 +1,178 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "core/core.h" +#include "common/object_pool.h" +#include "video_core/renderer_vulkan/vk_backend.h" +#include "video_core/renderer_vulkan/vk_buffer.h" +#include "video_core/renderer_vulkan/vk_texture.h" + +namespace VideoCore::Vulkan { + +Backend::Backend(Frontend::EmuWindow& window) : BackendBase(window), + instance(window), swapchain(instance, instance.GetSurface()), + scheduler(instance) { + + // TODO: Properly report GPU hardware + auto& telemetry_session = Core::System::GetInstance().TelemetrySession(); + constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; + telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA"); + telemetry_session.AddField(user_system, "GPU_Model", "GTX 1650"); + telemetry_session.AddField(user_system, "GPU_Vulkan_Version", "Vulkan 1.3"); + + // Pre-create all needed renderpasses by the renderer + constexpr std::array color_formats = { + vk::Format::eR8G8B8A8Unorm, + vk::Format::eR8G8B8Unorm, + vk::Format::eR5G5B5A1UnormPack16, + vk::Format::eR5G6B5UnormPack16, + vk::Format::eR4G4B4A4UnormPack16 + }; + + constexpr std::array depth_stencil_formats = { + vk::Format::eD16Unorm, + vk::Format::eX8D24UnormPack32, + vk::Format::eD24UnormS8Uint, + }; + + // Create all required renderpasses + for (u32 color = 0; color < MAX_COLOR_FORMATS; color++) { + for (u32 depth = 0; depth < MAX_DEPTH_FORMATS; depth++) { + u32 index = color * MAX_COLOR_FORMATS + depth; + renderpass_cache[index] = CreateRenderPass(color_formats[color], depth_stencil_formats[depth]); + } + } +} + +Backend::~Backend() { + vk::Device device = instance.GetDevice(); + for (auto& renderpass : renderpass_cache) { + device.destroyRenderPass(renderpass); + } +} + +/** + * To avoid many small heap allocations during handle creation, each resource has a dedicated pool + * associated with it that batch allocates memory. + */ +BufferHandle Backend::CreateBuffer(BufferInfo info) { + static ObjectPool buffer_pool; + return IntrusivePtr{buffer_pool.Allocate(info)}; +} + +FramebufferHandle Backend::CreateFramebuffer(FramebufferInfo info) { +} + +TextureHandle Backend::CreateTexture(TextureInfo info) { + static ObjectPool texture_pool; + return IntrusivePtr{texture_pool.Allocate(info)}; +} + +PipelineHandle Backend::CreatePipeline(PipelineType type, PipelineInfo info) { + static ObjectPool pipeline_pool; + + // Find a pipeline layout first + if (auto iter = pipeline_layouts.find(info.layout); iter != pipeline_layouts.end()) { + PipelineLayout& layout = iter->second; + + return IntrusivePtr{pipeline_pool.Allocate(instance, layout, type, info, cache)}; + } + + // Create the layout + auto result = pipeline_layouts.emplace(info.layout, PipelineLayout{instance, info.layout}); + return IntrusivePtr{pipeline_pool.Allocate(instance, result.first->second, type, info, cache)}; +} + +SamplerHandle Backend::CreateSampler(SamplerInfo info) { + static ObjectPool sampler_pool; + return IntrusivePtr{sampler_pool.Allocate(info)}; +} + +void Backend::Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, + BufferHandle vertex_buffer, + u32 base_vertex, u32 num_vertices) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + + Buffer* vertex = static_cast(vertex_buffer.Get()); + command_buffer.bindVertexBuffers(0, vertex->GetHandle(), {0}); + + // Submit draw + command_buffer.draw(num_vertices, 1, base_vertex, 0); +} + +void Backend::DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, + BufferHandle vertex_buffer, BufferHandle index_buffer, + u32 base_index, u32 num_indices, u32 base_vertex) { + +} + + +vk::RenderPass Backend::CreateRenderPass(vk::Format color, vk::Format depth) const { + // Define attachments + const std::array attachments = { + vk::AttachmentDescription{ + .format = color, + .stencilLoadOp = vk::AttachmentLoadOp::eDontCare, + .stencilStoreOp = vk::AttachmentStoreOp::eDontCare, + .initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + .finalLayout = vk::ImageLayout::eColorAttachmentOptimal + }, + vk::AttachmentDescription{ + .format = depth, + .initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + .finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal + } + }; + + // Our renderpasses only defines one color and depth attachment + const vk::AttachmentReference color_attachment_ref = { + .attachment = 0, + .layout = vk::ImageLayout::eColorAttachmentOptimal + }; + + const vk::AttachmentReference depth_attachment_ref = { + .attachment = 1, + .layout = vk::ImageLayout::eDepthStencilAttachmentOptimal + }; + + const vk::SubpassDependency subpass_dependency = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput | + vk::PipelineStageFlagBits::eEarlyFragmentTests, + .dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput | + vk::PipelineStageFlagBits::eEarlyFragmentTests, + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dependencyFlags = vk::DependencyFlagBits::eByRegion + }; + + // We also require only one subpass + const vk::SubpassDescription subpass = { + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = 1, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = 0, + .pDepthStencilAttachment = &depth_attachment_ref + }; + + const vk::RenderPassCreateInfo renderpass_info = { + .attachmentCount = 2, + .pAttachments = attachments.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 1, + .pDependencies = &subpass_dependency + }; + + // Create the renderpass + vk::Device device = instance.GetDevice(); + return device.createRenderPass(renderpass_info); +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_backend.h b/src/video_core/renderer_vulkan/vk_backend.h new file mode 100644 index 000000000..2ca8b3b22 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_backend.h @@ -0,0 +1,75 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "video_core/common/backend.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" + +namespace VideoCore::Vulkan { + +class Texture; + +constexpr u32 RENDERPASS_COUNT = MAX_COLOR_FORMATS * MAX_DEPTH_FORMATS; + +class Backend : public VideoCore::BackendBase { +public: + Backend(Frontend::EmuWindow& window); + ~Backend(); + + void SwapBuffers() override; + + BufferHandle CreateBuffer(BufferInfo info) override; + + FramebufferHandle CreateFramebuffer(FramebufferInfo info) override; + + TextureHandle CreateTexture(TextureInfo info) override; + + PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) override; + + SamplerHandle CreateSampler(SamplerInfo info) override; + + void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, + BufferHandle vertex_buffer, + u32 base_vertex, u32 num_vertices) override; + + void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, + BufferHandle vertex_buffer, BufferHandle index_buffer, + u32 base_index, u32 num_indices, u32 base_vertex) override; + + void DispatchCompute(PipelineHandle pipeline, Common::Vec3 groupsize, + Common::Vec3 groups) override; + + // Returns the vulkan instance + inline const Instance& GetInstance() const { + return instance; + } + + // Returns the vulkan command buffer scheduler + inline CommandScheduler& GetScheduler() { + return scheduler; + } + +private: + vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth) const; + +private: + Instance instance; + Swapchain swapchain; + CommandScheduler scheduler; + + // The formats Citra uses are limited so we can pre-create + // all the renderpasses we will need + std::array renderpass_cache; + vk::PipelineCache cache; + + // Pipeline layout cache + std::unordered_map pipeline_layouts; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer.cpp b/src/video_core/renderer_vulkan/vk_buffer.cpp index e5d682927..ea9144e82 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer.cpp @@ -2,165 +2,181 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#define VULKAN_HPP_NO_CONSTRUCTORS #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_buffer.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include -namespace Vulkan { +namespace VideoCore::Vulkan { + +inline vk::BufferUsageFlags ToVkBufferUsage(BufferUsage usage) { + constexpr std::array vk_buffer_usages = { + vk::BufferUsageFlagBits::eVertexBuffer, + vk::BufferUsageFlagBits::eIndexBuffer, + vk::BufferUsageFlagBits::eUniformBuffer, + vk::BufferUsageFlagBits::eUniformTexelBuffer, + vk::BufferUsageFlagBits::eTransferSrc + }; + + return vk::BufferUsageFlagBits::eTransferDst | + vk_buffer_usages.at(static_cast(usage)); +} + +inline vk::Format ToVkViewFormat(ViewFormat format) { + constexpr std::array vk_view_formats = { + vk::Format::eR32Sfloat, + vk::Format::eR32G32Sfloat, + vk::Format::eR32G32B32Sfloat, + vk::Format::eR32G32B32A32Sfloat + }; + + return vk_view_formats.at(static_cast(format)); +} + +Buffer::Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info) : + BufferBase(info), instance(instance), scheduler(scheduler) { + + vk::BufferCreateInfo buffer_info = { + .size = info.capacity, + .usage = ToVkBufferUsage(info.usage) + }; + + VmaAllocationCreateInfo alloc_create_info = { + .flags = info.usage == BufferUsage::Staging ? + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT) : + VmaAllocationCreateFlags{}, + .usage = VMA_MEMORY_USAGE_AUTO + }; + + VkBuffer unsafe_buffer = VK_NULL_HANDLE; + VkBufferCreateInfo unsafe_buffer_info = static_cast(buffer_info); + VmaAllocationInfo alloc_info; + VmaAllocator allocator = instance.GetAllocator(); + + // Allocate texture memory + vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, + &unsafe_buffer, &allocation, &alloc_info); + buffer = vk::Buffer{unsafe_buffer}; + + u32 view = 0; + vk::Device device = instance.GetDevice(); + while (info.views[view] != ViewFormat::Undefined) { + const vk::BufferViewCreateInfo view_info = { + .buffer = buffer, + .format = ToVkViewFormat(info.views[view]), + .range = info.capacity + }; + + views[view++] = device.createBufferView(view_info); + } + + // Map memory + if (info.usage == BufferUsage::Staging) { + mapped_ptr = alloc_info.pMappedData; + } +} Buffer::~Buffer() { - Destroy(); -} - -void Buffer::Create(const Buffer::Info& info) { - auto device = g_vk_instace->GetDevice(); - buffer_info = info; - - vk::BufferCreateInfo bufferInfo({}, info.size, info.usage); - buffer = device.createBuffer(bufferInfo); - - auto mem_requirements = device.getBufferMemoryRequirements(buffer); - - auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, info.properties); - vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index); - - memory = device.allocateMemory(alloc_info); - device.bindBufferMemory(buffer, memory, 0); - - // Optionally map the buffer to CPU memory - if (info.properties & vk::MemoryPropertyFlagBits::eHostVisible) { - host_ptr = device.mapMemory(memory, 0, info.size); - } - - for (auto& format : info.view_formats) { - if (format != vk::Format::eUndefined) { - views[view_count++] = device.createBufferView({{}, buffer, format, 0, info.size}); - } - } -} - -void Buffer::Recreate() { - Destroy(); - Create(buffer_info); -} - -void Buffer::Destroy() { if (buffer) { - if (host_ptr != nullptr) { - g_vk_instace->GetDevice().unmapMemory(memory); - } + auto deleter = [allocation = allocation, + buffer = buffer, + views = views](vk::Device device, VmaAllocator allocator) { + vmaDestroyBuffer(allocator, static_cast(buffer), allocation); - auto deleter = [buffer = buffer, - memory = memory, - view_count = view_count, - views = views]() { - auto device = g_vk_instace->GetDevice(); - device.destroyBuffer(buffer); - device.freeMemory(memory); - - for (u32 i = 0; i < view_count; i++) { - device.destroyBufferView(views[i]); + u32 view_index = 0; + while (views[view_index]) { + device.destroyBufferView(views[view_index++]); } }; - g_vk_task_scheduler->Schedule(deleter); - } -} - -u32 Buffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties) { - vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties(); - - for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) - { - auto flags = mem_properties.memoryTypes[i].propertyFlags; - if ((type_filter & (1 << i)) && (flags & properties) == properties) - return i; - } - - LOG_CRITICAL(Render_Vulkan, "Failed to find suitable memory type."); - UNREACHABLE(); -} - -void Buffer::Upload(std::span data, u32 offset, - vk::AccessFlags access_to_block, - vk::PipelineStageFlags stage_to_block) { - auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); - // For small data uploads use vkCmdUpdateBuffer - if (data.size_bytes() < 1024) { - cmdbuffer.updateBuffer(buffer, 0, data.size_bytes(), data.data()); - } - else { - auto [ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(data.size()); - if (!ptr) { - LOG_ERROR(Render_Vulkan, "Cannot upload data without staging buffer!"); + // Delete the buffer immediately if it's allocated in host memory + if (info.usage == BufferUsage::Staging) { + vk::Device device = instance.GetDevice(); + VmaAllocator allocator = instance.GetAllocator(); + deleter(device, allocator); + } else { + scheduler.Schedule(deleter); } - - // Copy pixels to staging buffer - std::memcpy(ptr, data.data(), data.size_bytes()); - - auto region = vk::BufferCopy{staging_offset, offset, data.size_bytes()}; - auto& staging = g_vk_task_scheduler->GetStaging(); - cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, region); } - - vk::BufferMemoryBarrier barrier{ - vk::AccessFlagBits::eTransferWrite, access_to_block, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - buffer, offset, data.size_bytes() - }; - - // Add a pipeline barrier for the region modified - cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block, - vk::DependencyFlagBits::eByRegion, - 0, nullptr, 1, &barrier, 0, nullptr); } -std::tuple StreamBuffer::Map(u32 size, u32 alignment) { - ASSERT(size <= buffer_info.size); - ASSERT(alignment <= buffer_info.size); +std::span Buffer::Map(u32 size, u32 alignment) { + ASSERT(size <= info.capacity && alignment <= info.capacity); if (alignment > 0) { - buffer_pos = Common::AlignUp(buffer_pos, alignment); + buffer_offset = Common::AlignUp(buffer_offset, alignment); } - bool invalidate = false; - if (buffer_pos + size > buffer_info.size) { - buffer_pos = 0; - invalidate = true; + // If the buffer is full, invalidate it + if (buffer_offset + size > info.capacity) { + Invalidate(); } - auto [staging_ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(size); - mapped_chunk = vk::BufferCopy{staging_offset, buffer_pos, size}; - - return std::make_tuple(staging_ptr, buffer_pos, invalidate); + if (info.usage == BufferUsage::Staging) { + return std::span{reinterpret_cast(mapped_ptr) + buffer_offset, size}; + } else { + Buffer& staging = scheduler.GetCommandUploadBuffer(); + return staging.Map(size, alignment); + } } -void StreamBuffer::Commit(u32 size, vk::AccessFlags access_to_block, - vk::PipelineStageFlags stage_to_block) { - if (size > 0) { - mapped_chunk.size = size; +void Buffer::Commit(u32 size) { + VmaAllocator allocator = instance.GetAllocator(); + if (info.usage == BufferUsage::Staging && size > 0) { + vmaFlushAllocation(allocator, allocation, buffer_offset, size); + } else { + vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer(); + Buffer& staging = scheduler.GetCommandUploadBuffer(); - auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); - auto& staging = g_vk_task_scheduler->GetStaging(); - cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, mapped_chunk); + const vk::BufferCopy copy_region = { + .srcOffset = staging.GetCurrentOffset(), + .dstOffset = buffer_offset, + .size = size + }; - vk::BufferMemoryBarrier barrier{ - vk::AccessFlagBits::eTransferWrite, access_to_block, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - buffer, mapped_chunk.dstOffset, mapped_chunk.size + // Copy staging buffer to device local buffer + command_buffer.copyBuffer(staging.GetHandle(), buffer, copy_region); + + vk::AccessFlags access_mask; + vk::PipelineStageFlags stage_mask; + switch (info.usage) { + case BufferUsage::Vertex: + access_mask = vk::AccessFlagBits::eVertexAttributeRead; + stage_mask = vk::PipelineStageFlagBits::eVertexInput; + break; + case BufferUsage::Index: + access_mask = vk::AccessFlagBits::eIndexRead; + stage_mask = vk::PipelineStageFlagBits::eVertexInput; + break; + case BufferUsage::Uniform: + case BufferUsage::Texel: + access_mask = vk::AccessFlagBits::eUniformRead; + stage_mask = vk::PipelineStageFlagBits::eVertexShader | + vk::PipelineStageFlagBits::eFragmentShader; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown BufferUsage flag!"); + } + + const vk::BufferMemoryBarrier buffer_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = access_mask, + .buffer = buffer, + .offset = buffer_offset, + .size = size }; // Add a pipeline barrier for the region modified - cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block, - vk::DependencyFlagBits::eByRegion, - 0, nullptr, 1, &barrier, 0, nullptr); + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, + vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {}); - buffer_pos += size; } + + buffer_offset += size; } } diff --git a/src/video_core/renderer_vulkan/vk_buffer.h b/src/video_core/renderer_vulkan/vk_buffer.h index 09701031c..ac1241ab2 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.h +++ b/src/video_core/renderer_vulkan/vk_buffer.h @@ -4,80 +4,47 @@ #pragma once -#include -#include -#include -#include -#include "common/common_types.h" +#include +#include "common/assert.h" +#include "video_core/common/buffer.h" #include "video_core/renderer_vulkan/vk_common.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -constexpr u32 MAX_BUFFER_VIEWS = 5; -constexpr u32 MAX_COMMIT_CHUNKS = 6; +class Instance; +class CommandScheduler; -/// Generic Vulkan buffer object used by almost every resource -class Buffer : public NonCopyable { +class Buffer : public VideoCore::BufferBase { public: - struct Info { - u32 size; - vk::MemoryPropertyFlags properties; - vk::BufferUsageFlags usage; - std::array view_formats{}; - }; + Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info); + ~Buffer() override; - Buffer() = default; - ~Buffer(); + std::span Map(u32 size, u32 alignment = 0) override; - /// Enable move operations - Buffer(Buffer&&) = default; - Buffer& operator=(Buffer&&) = default; + /// Flushes write to buffer memory + void Commit(u32 size = 0) override; - /// Create a new Vulkan buffer object - void Create(const Info& info); - void Recreate(); - void Destroy(); + /// Returns the Vulkan buffer handle + vk::Buffer GetHandle() const { + return buffer; + } - /// Global utility functions used by other objects - static u32 FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties); - - /// Return a pointer to the mapped memory if the buffer is host mapped - u8* GetHostPointer() const { return reinterpret_cast(host_ptr); } - const vk::BufferView& GetView(u32 i = 0) const { return views[i]; } - const vk::Buffer& GetBuffer() const { return buffer; } - u32 GetSize() const { return buffer_info.size; } - - void Upload(std::span data, u32 offset, - vk::AccessFlags access_to_block = vk::AccessFlagBits::eVertexAttributeRead, - vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexInput); + /// Returns an immutable reference to the requested buffer view + const vk::BufferView& GetView(u32 index = 0) const { + ASSERT(index < view_count); + return views[index]; + } protected: - Info buffer_info; - vk::Buffer buffer; - vk::DeviceMemory memory; - void* host_ptr = nullptr; - std::array views; - u32 view_count{}; -}; + Instance& instance; + CommandScheduler& scheduler; -class StreamBuffer : public Buffer { -public: - /* - * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes - * and the optional alignment requirement. - * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. - * The return values are the pointer to the new chunk, the offset within the buffer, - * and the invalidation flag for previous chunks. - * The actual used size must be specified on unmapping the chunk. - */ - std::tuple Map(u32 size, u32 alignment = 0); - void Commit(u32 size, vk::AccessFlags access_to_block = vk::AccessFlagBits::eUniformRead, - vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexShader | - vk::PipelineStageFlagBits::eFragmentShader); - -private: - u32 buffer_pos{}; - vk::BufferCopy mapped_chunk; + // Vulkan buffer handle + void* mapped_ptr = nullptr; + vk::Buffer buffer = VK_NULL_HANDLE; + VmaAllocation allocation = VK_NULL_HANDLE; + std::array views{}; + u32 view_count = 0; }; } diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index b345a59f5..d75ec5d43 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -14,3 +14,74 @@ #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 #define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1 #include + +namespace VideoCore::Vulkan { + +/// Returns the aligned byte size of each pixel in the specified format +constexpr float GetFormatSize(vk::Format format) { + switch (format) { + case vk::Format::eR8G8B8A8Unorm: + case vk::Format::eD24UnormS8Uint: + return 4; + case vk::Format::eR8G8B8Unorm: + return 3; + case vk::Format::eR5G5B5A1UnormPack16: + case vk::Format::eR5G6B5UnormPack16: + case vk::Format::eR4G4B4A4UnormPack16: + case vk::Format::eD16Unorm: + return 2; + default: + return 0; + }; +} + +/// Return the image aspect associated on the provided format +constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) { + vk::ImageAspectFlags flags; + switch (format) { + case vk::Format::eD16UnormS8Uint: + case vk::Format::eD24UnormS8Uint: + case vk::Format::eX8D24UnormPack32: + case vk::Format::eD32SfloatS8Uint: + flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth; + break; + case vk::Format::eD16Unorm: + case vk::Format::eD32Sfloat: + flags = vk::ImageAspectFlagBits::eDepth; + break; + default: + flags = vk::ImageAspectFlagBits::eColor; + } + + return flags; +} + +/// Returns a bit mask with the required usage of a format with a particular aspect +constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) { + auto usage = vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eTransferDst | + vk::ImageUsageFlagBits::eTransferSrc; + + if (aspect & vk::ImageAspectFlagBits::eDepth) { + return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment; + } else { + return usage | vk::ImageUsageFlagBits::eColorAttachment; + } +}; + +/// Returns a bit mask with the required features of a format with a particular aspect +constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) { + auto usage = vk::FormatFeatureFlagBits::eSampledImage | + vk::FormatFeatureFlagBits::eTransferDst | + vk::FormatFeatureFlagBits::eTransferSrc | + vk::FormatFeatureFlagBits::eBlitSrc | + vk::FormatFeatureFlagBits::eBlitDst; + + if (aspect & vk::ImageAspectFlagBits::eDepth) { + return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment; + } else { + return usage | vk::FormatFeatureFlagBits::eColorAttachment; + } +}; + +} diff --git a/src/video_core/renderer_vulkan/vk_format_util.cpp b/src/video_core/renderer_vulkan/vk_format_util.cpp new file mode 100644 index 000000000..1b44fd8ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_format_util.cpp @@ -0,0 +1,6 @@ +#include "vk_format_util.h" + +vk_format_util::vk_format_util() +{ + +} diff --git a/src/video_core/renderer_vulkan/vk_format_util.h b/src/video_core/renderer_vulkan/vk_format_util.h new file mode 100644 index 000000000..0a847fb8f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_format_util.h @@ -0,0 +1,436 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore::Vulkan { + +enum class SIMD : u8 { + None = 0, + SSE4 = 1, + AVX2 = 2, + NEON = 3 +}; + +/** + * A Pixel holds a pixel value or a SIMD lane holding multiple "real" pixels + */ +#pragma pack(1) +template +struct Pixel { + using StorageType = std::conditional_t>>; + Pixel() = default; + + // Memory load/store + constexpr void Load(u8* memory) { + std::memcpy(&storage, memory, bytes); + } + + constexpr void Store(u8* memory) const { + std::memcpy(memory, &storage, bytes); + } + + // Returns the number of bytes until the next pixel + constexpr u8 GetStride() const { + return bytes; + } + + // Bitwise operators + constexpr Pixel RotateRight(int n) const { + return std::rotr(storage, n); + } + + constexpr StorageType operator & (const StorageType mask) const { + return storage & mask; + } + + constexpr StorageType operator | (const StorageType mask) const { + return storage | mask; + } + + constexpr StorageType operator >>(const int n) const { + return storage >> n; + } + + constexpr StorageType operator <<(const int n) const { + return storage << n; + } + +private: + StorageType storage; +}; +#pragma pack() + +/** + * Information about a pixel format + */ +template +struct FormatInfo { + constexpr FormatInfo(vk::Format format) { + for (int i = 0; i < components; i++) { + name[i] = vk::componentName(format, i)[0]; + is_float[i] = std::string_view{vk::componentNumericFormat(format, i)} + == "SFLOAT"; + bits[i] = vk::componentBits(format, i); + bit_offset[i] = (i > 0 ? bit_offset[i - 1] + bits[i - 1] : 0); + } + + bytes = (format == vk::Format::eD32SfloatS8Uint ? 8 : + vk::blockSize(format)); + } + + static constexpr u32 components = Components; + std::array name; + std::array is_float; + std::array bit_offset; + std::array bits; + u8 bytes; // This includes the padding in D32S8 +}; + +/** + * Represents a mapping of components from one format to another + */ +template +struct Mapping { + static constexpr u32 component_map_bits = 4; + static constexpr u32 component_map_mask = (1 << component_map_bits) - 1; + + constexpr Mapping() { + for (int i = 0; i < source.names.size(); i++) { + constexpr char source_name = source.names[i]; + for (u8 j = 0; j < dest.names.size(); j++) { + constexpr char dest_name = dest.names[j]; + if constexpr (source_name == dest_name) { + storage |= ((j & component_map_mask) << component_map_bits * i); + break; + } + } + } + } + + constexpr u8 GetMapping(const int component) { + return (storage >> (component * component_map_bits)) & component_map_mask; + } + + // Returns the number of bits to rotate a pixel to the right + // to match the mapping of the destiation format. If it's not + // possible returns -1 + constexpr s32 TestMappingRotation() { + constexpr u16 identity = 0x3210; + + u32 total_bits_rotated = 0; + auto test_rotation = [&](s32 i) -> bool { + return (storage == std::rotr(identity, i * component_map_bits)); + }; + + for (s32 rot = 0; rot < 4; rot++) { + if (test_rotation(rot)) { + return total_bits_rotated; + } + + total_bits_rotated += source.bits[rot]; + } + + return -1; + } + + // Returns true if the each component of the source format has the + // same bit-width as the mapped destination format component + constexpr bool AreBitwiseEqual() { + bool result = source.bytes == dest.bytes; + for (int i = 0; i < source.components; i++) { + result &= (source.bits[i] == dest.bits[GetMapping(i)]); + } + + return result; + } + +private: + // Since there are at most 4 components we can use 4 bits for each component + u16 storage = 0xFFFF; +}; + +// Allows for loop like iteration at compile time +template +constexpr void ForEach(F&& f) { + if constexpr (Start < End) { + f(std::integral_constant()); + ForEach(f); + } +} + +// Copies pixel data from a source to a destionation buffer, performing +// format conversion at the same time +template +constexpr void Convert2(std::span source, std::span dest) { + constexpr u32 source_components = vk::componentCount(source_format); + constexpr u32 dest_components = vk::componentCount(dest_format); + + // Query vulkan hpp format traits for the info we need + constexpr FormatInfo source_info{source_format}; + constexpr FormatInfo dest_info{dest_format}; + + // Create a table with the required component mapping + constexpr Mapping mapping{}; + + // Begin conversion + u32 source_offset = 0; + u32 dest_offset = 0; + while (source_offset < source.size()) { + // Load source pixel + Pixel source_pixel; + Pixel dest_pixel{}; + + // Load data into the pixel + source_pixel.Load(source.data() + source_offset); + + // OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations + // of one another. We can use a faster path for these + if constexpr (s32 rot = mapping.TestMappingRotation(); + rot > -1 && mapping.AreBitwiseEqual()) { + dest_pixel = source_pixel.RotateRight(rot); + // RGB8 <-> RGBA8 is extrenely common on desktop GPUs + // so it deserves a special path + } else if constexpr (true) { + } else { + ForEach<0, source_components>([&](auto comp) { + constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3; + + // If the component is not mapped skip it + if constexpr (dest_comp == 0xFF) { + return; + } + + // Retrieve component + u32 component = GetComponent(source_pixel); + + constexpr bool is_source_float = IsFloat(comp); + constexpr bool is_dest_float = IsFloat(dest_comp); + + // Perform float <-> int conversion (normalization) + if constexpr (is_source_float && !is_dest_float) { + float temp; + std::memcpy(&temp, &component, sizeof(float)); + + constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1; + component = static_cast(temp * mask); + } else if constexpr (!is_source_float && is_dest_float) { + constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1; + float temp = static_cast(component) / mask; + std::memcpy(&component, &temp, sizeof(float)); + } + + SetComponent(dest_pixel, component); + }); + } + + // Write destination pixel (dest_bytes includes the padding so we cannot use it here) + std::memcpy(dest.data() + dest_offset, DataPtr(dest_pixel), + vk::blockSize(dest_format)); + + // Copy next pixel + source_offset += source_pixel.GetStride(); + dest_offset += dest_pixel.GetStride(); + } +} + +// Asign the byte count with an integral type +template +struct PackedInt { using type = typename std::array; }; + +template <> +struct PackedInt<1> { using type = u8; }; + +template <> +struct PackedInt<2> { using type = u16; }; + +template <> +struct PackedInt<4> { using type = u32; }; + +template <> +struct PackedInt<8> { using type = u64; }; + +template +using PackedType = typename PackedInt::type; + +// Returns the pointer to the raw bytes respecting the underlying type +template +constexpr u8* DataPtr(PackedType& data) { + if constexpr (std::is_integral_v>) { + return reinterpret_cast(&data); + } else { + return data.data(); + } +} + +// Returns true when the specified component is of float type +template +constexpr bool IsFloat(u8 component) { + return std::string_view{vk::componentNumericFormat(format, component)} == "SFLOAT"; +} + +// Returns the offset in bits of the component from the start of the pixel +template +constexpr u32 GetComponentBitOffset() { + if constexpr (i == component) { + return 0; + } else { + return vk::componentBits(format, i) + + GetComponentBitOffset(); + } +} + +// Returns the data located at the specified component +template +constexpr u32 GetComponent(PackedType& pixel) { + constexpr u64 bit_offset = GetComponentBitOffset(); + constexpr u64 component_bits = vk::componentBits(format, component); + constexpr u64 mask = (1 << component_bits) - 1; + + // First process packed formats which are easy to extract from + if constexpr (std::is_integral_v>) { + return (pixel >> bit_offset) & mask; + } else { + // Assume component_bits and offset are byte aligned. Otherwise + // this would be extremely complicated + using ComponentType = PackedType<(component_bits >> 3)>; + static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0); + static_assert(std::is_integral_v); + + constexpr u64 byte_offset = bit_offset >> 3; + return *reinterpret_cast(DataPtr(pixel) + byte_offset); + } +} + +template +constexpr void SetComponent(PackedType& pixel, u32 data) { + constexpr u64 bit_offset = GetComponentBitOffset(); + constexpr u64 component_bits = vk::componentBits(format, component); + constexpr u64 mask = (1ull << component_bits) - 1; + + // First process packed formats which are easy to write + if constexpr (std::is_integral_v>) { + pixel |= (data & mask) << bit_offset; + } else { + // Assume component_bits and offset are byte aligned. Otherwise + // this would be extremely complicated + using ComponentType = PackedType<(component_bits >> 3)>; + static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0); + static_assert(std::is_integral_v); + + constexpr u64 byte_offset = bit_offset >> 3; + *reinterpret_cast(DataPtr(pixel) + byte_offset) = data; + } +} + +constexpr bool CanUseRotation(); + +// Lookup table that maps component i of source format +// to component mapping[i] of the destination format +template +constexpr auto ComponentMapping() { + // Since there are at most 4 components we can use 2 bits for each index + u8 mapping = 0xFF; + for (u8 i = 0; i < source_components; i++) { + auto source_name = vk::componentName(source_format, i); + for (u8 j = 0; j < dest_components; j++) { + auto dest_name = vk::componentName(dest_format, j); + if (std::string_view{source_name} == std::string_view{dest_name}) { + mapping |= ((j & 0x3) << 2 * i); + break; + } + } + } + + return mapping; +} + +// Allows for loop like iteration at compile time +template +constexpr void ConstexprFor(F&& f) { + if constexpr (Start < End) { + f(std::integral_constant()); + ConstexprFor(f); + } +} + +// Copies pixel data from a source to a destionation buffer, performing +// format conversion at the same time +template +constexpr void Convert(std::span source, std::span dest) { + constexpr u32 source_components = vk::componentCount(source_format); + constexpr u32 dest_components = vk::componentCount(dest_format); + + // Create a table with the required component mapping + constexpr auto mapping = ComponentMapping(); + u32 source_offset = 0; + u32 dest_offset = 0; + while (source_offset < source.size()) { + // Load source pixel + PackedType source_pixel; + std::memcpy(DataPtr(source_pixel), + source.data() + source_offset, source_bytes); + + PackedType dest_pixel{}; + + // OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations + // of one another. We can use a faster path for these + + ConstexprFor<0, source_components>([&](auto comp) { + constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3; + + // If the component is not mapped skip it + if constexpr (dest_comp == 0xFF) { + return; + } + + // Retrieve component + u32 component = GetComponent(source_pixel); + + constexpr bool is_source_float = IsFloat(comp); + constexpr bool is_dest_float = IsFloat(dest_comp); + + // Perform float <-> int conversion (normalization) + if constexpr (is_source_float && !is_dest_float) { + float temp; + std::memcpy(&temp, &component, sizeof(float)); + + constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1; + component = static_cast(temp * mask); + } else if constexpr (!is_source_float && is_dest_float) { + constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1; + float temp = static_cast(component) / mask; + std::memcpy(&component, &temp, sizeof(float)); + } + + SetComponent(dest_pixel, component); + }); + + // Write destination pixel (dest_bytes includes the padding so we cannot use it here) + std::memcpy(dest.data() + dest_offset, DataPtr(dest_pixel), + vk::blockSize(dest_format)); + + // Copy next pixel + source_offset += source_bytes; + dest_offset += dest_bytes; + } +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 850c2e448..6fac4a4e6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -2,52 +2,148 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#define VULKAN_HPP_NO_CONSTRUCTORS +#include #include -#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_platform.h" #include "video_core/renderer_vulkan/vk_instance.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -std::unique_ptr g_vk_instace; +Instance::Instance(Frontend::EmuWindow& window) { + auto window_info = window.GetWindowInfo(); + + // Enable the instance extensions the backend uses + auto extensions = GetInstanceExtensions(window_info.type, true); + + // We require a Vulkan 1.1 driver + const u32 available_version = vk::enumerateInstanceVersion(); + if (available_version < VK_API_VERSION_1_1) { + LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!"); + } + + const vk::ApplicationInfo application_info = { + .pApplicationName = "Citra", + .applicationVersion = VK_MAKE_VERSION(1, 0, 0), + .pEngineName = "Citra Vulkan", + .engineVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = available_version + }; + + const std::array layers = {"VK_LAYER_KHRONOS_validation"}; + const vk::InstanceCreateInfo instance_info = { + .pApplicationInfo = &application_info, + .enabledLayerCount = static_cast(layers.size()), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = static_cast(extensions.size()), + .ppEnabledExtensionNames = extensions.data() + }; + + // Create VkInstance + instance = vk::createInstance(instance_info); + surface = CreateSurface(instance, window); + + // TODO: GPU select dialog + physical_device = instance.enumeratePhysicalDevices()[0]; + device_limits = physical_device.getProperties().limits; + + // Create logical device + CreateDevice(true); +} Instance::~Instance() { device.waitIdle(); - device.destroy(); instance.destroy(); } -bool Instance::Create(vk::Instance new_instance, vk::PhysicalDevice gpu, - vk::SurfaceKHR surface, bool enable_validation_layer) { - instance = new_instance; - physical_device = gpu; - - // Get physical device limits - device_limits = physical_device.getProperties().limits; - +bool Instance::CreateDevice(bool validation_enabled) { // Determine required extensions and features - if (!FindExtensions() || !FindFeatures()) - return false; + auto feature_chain = physical_device.getFeatures2(); - // Create logical device - return CreateDevice(surface, enable_validation_layer); -} + // Not having geometry shaders or wide lines will cause issues with rendering. + const vk::PhysicalDeviceFeatures available = feature_chain.get().features; + if (!available.geometryShader && !available.wideLines) { + LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!"); + } -bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) { - // Can't create an instance without a valid surface - if (!surface) { - LOG_CRITICAL(Render_Vulkan, "Invalid surface provided during instance creation!"); + // Enable some common features other emulators like Dolphin use + const vk::PhysicalDeviceFeatures2 features = { + .features = { + .robustBufferAccess = available.robustBufferAccess, + .geometryShader = available.geometryShader, + .sampleRateShading = available.sampleRateShading, + .dualSrcBlend = available.dualSrcBlend, + .logicOp = available.logicOp, + .depthClamp = available.depthClamp, + .largePoints = available.largePoints, + .samplerAnisotropy = available.samplerAnisotropy, + .occlusionQueryPrecise = available.occlusionQueryPrecise, + .fragmentStoresAndAtomics = available.fragmentStoresAndAtomics, + .shaderStorageImageMultisample = available.shaderStorageImageMultisample, + .shaderClipDistance = available.shaderClipDistance + } + }; + + // Enable newer Vulkan features + auto enabled_features = vk::StructureChain{ + features, + feature_chain.get(), + feature_chain.get(), + feature_chain.get() + }; + + auto extension_list = physical_device.enumerateDeviceExtensionProperties(); + if (extension_list.empty()) { + LOG_CRITICAL(Render_Vulkan, "No extensions supported by device."); return false; } + // List available device extensions + for (const auto& extension : extension_list) { + LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName); + } + + // Helper lambda for adding extensions + std::array enabled_extensions; + u32 enabled_extension_count = 0; + + auto AddExtension = [&](std::string_view name, bool required) -> bool { + auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) { + return name.compare(prop.extensionName.data()); + }); + + if (result != extension_list.end()) { + LOG_INFO(Render_Vulkan, "Enabling extension: {}", name); + enabled_extensions[enabled_extension_count++] = name.data(); + return true; + } + + if (required) { + LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name); + } + + return false; + }; + + // Add required extensions + AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); + + // Check for optional features + dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false); + extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false); + + // Search queue families for graphics and present queues auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues."); return false; } - // Search queue families for graphics and present queues graphics_queue_family_index = -1; present_queue_family_index = -1; for (int i = 0; i < family_properties.size(); i++) { @@ -68,24 +164,35 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) { } } - if (graphics_queue_family_index == -1 || - present_queue_family_index == -1) { + if (graphics_queue_family_index == -1 || present_queue_family_index == -1) { LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues."); return false; } static constexpr float queue_priorities[] = {1.0f}; - const std::array layers{"VK_LAYER_KHRONOS_validation"}; - const std::array queue_infos{ - vk::DeviceQueueCreateInfo{{}, graphics_queue_family_index, 1, queue_priorities}, - vk::DeviceQueueCreateInfo{{}, present_queue_family_index, 1, queue_priorities} + const std::array layers = {"VK_LAYER_KHRONOS_validation"}; + const std::array queue_infos = { + vk::DeviceQueueCreateInfo{ + .queueFamilyIndex = graphics_queue_family_index, + .queueCount = 1, + .pQueuePriorities = queue_priorities + }, + vk::DeviceQueueCreateInfo{ + .queueFamilyIndex = present_queue_family_index, + .queueCount = 1, + .pQueuePriorities = queue_priorities + } }; - vk::DeviceCreateInfo device_info({}, 1, queue_infos.data(), 0, nullptr, - extensions.size(), extensions.data(), nullptr, &features); + vk::DeviceCreateInfo device_info = { + .pNext = &enabled_features, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = queue_infos.data(), + .enabledExtensionCount = enabled_extension_count, + .ppEnabledExtensionNames = enabled_extensions.data(), + }; - // Create queue create info structs if (graphics_queue_family_index != present_queue_family_index) { device_info.queueCreateInfoCount = 2; } @@ -104,87 +211,67 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) { graphics_queue = device.getQueue(graphics_queue_family_index, 0); present_queue = device.getQueue(present_queue_family_index, 0); - return true; -} - -bool Instance::FindFeatures() { - auto available = physical_device.getFeatures(); - - // Not having geometry shaders or wide lines will cause issues with rendering. - if (!available.geometryShader && !available.wideLines) { - LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Rendering will be limited"); - } - - // Enable some common features other emulators like Dolphin use - vk_features.dualSrcBlend = available.dualSrcBlend; - vk_features.geometryShader = available.geometryShader; - vk_features.samplerAnisotropy = available.samplerAnisotropy; - vk_features.logicOp = available.logicOp; - vk_features.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics; - vk_features.sampleRateShading = available.sampleRateShading; - vk_features.largePoints = available.largePoints; - vk_features.shaderStorageImageMultisample = available.shaderStorageImageMultisample; - vk_features.occlusionQueryPrecise = available.occlusionQueryPrecise; - vk_features.shaderClipDistance = available.shaderClipDistance; - vk_features.depthClamp = available.depthClamp; - vk_features.textureCompressionBC = available.textureCompressionBC; - - // Enable newer Vulkan features - vk12_features.timelineSemaphore = true; - vk13_features.dynamicRendering = true; - dynamic_state_features.extendedDynamicState = true; - dynamic_state2_features.extendedDynamicState2 = true; - - // Include features in device creation - vk12_features.pNext = &vk13_features; - vk13_features.pNext = &dynamic_state_features; - dynamic_state_features.pNext = &dynamic_state2_features; - features = vk::PhysicalDeviceFeatures2{vk_features, &vk12_features}; + // Create the VMA allocator + CreateAllocator(); return true; } -bool Instance::FindExtensions() { - auto available = physical_device.enumerateDeviceExtensionProperties(); - if (available.empty()) { - LOG_CRITICAL(Render_Vulkan, "No extensions supported by device."); - return false; - } - - // List available device extensions - for (const auto& prop : available) { - LOG_INFO(Render_Vulkan, "Vulkan extension: {}", prop.extensionName); - } - - // Helper lambda for adding extensions - auto AddExtension = [&](const char* name, bool required) { - auto result = std::find_if(available.begin(), available.end(), [&](const auto& prop) { - return !std::strcmp(name, prop.extensionName); - }); - - if (result != available.end()) { - LOG_INFO(Render_Vulkan, "Enabling extension: {}", name); - extensions.push_back(name); - return true; - } - - if (required) { - LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name); - } - - return false; +void Instance::CreateAllocator() { + VmaVulkanFunctions functions = { + .vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr, + .vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr }; - // Add required extensions - if (!AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true) || - !AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, true) || - !AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, true) || - !AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME, true) || - !AddExtension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME, true)) { - return false; + VmaAllocatorCreateInfo allocator_info = { + .physicalDevice = physical_device, + .device = device, + .pVulkanFunctions = &functions, + .instance = instance, + .vulkanApiVersion = VK_API_VERSION_1_1 + }; + + vmaCreateAllocator(&allocator_info, &allocator); +} + +bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const { + static std::unordered_map supported; + if (auto iter = supported.find(format); iter != supported.end()) { + return (iter->second.optimalTilingFeatures & usage) == usage; } - return true; + // Cache format properties so we don't have to query the driver all the time + const vk::FormatProperties properties = physical_device.getFormatProperties(format); + supported.insert(std::make_pair(format, properties)); + + return (properties.optimalTilingFeatures & usage) == usage; +} + +vk::Format Instance::GetFormatAlternative(vk::Format format) const { + vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format)); + if (IsFormatSupported(format, features)) { + return format; + } + + // Return the most supported alternative format preferably with the + // same block size according to the Vulkan spec. + // See 43.3. Required Format Support of the Vulkan spec + switch (format) { + case vk::Format::eD24UnormS8Uint: + return vk::Format::eD32SfloatS8Uint; + case vk::Format::eX8D24UnormPack32: + return vk::Format::eD32Sfloat; + case vk::Format::eR5G5B5A1UnormPack16: + return vk::Format::eA1R5G5B5UnormPack16; + case vk::Format::eR4G4B4A4UnormPack16: + return vk::Format::eB4G4R4A4UnormPack16; + case vk::Format::eR8G8B8Unorm: + return vk::Format::eR8G8B8A8Unorm; + default: + LOG_WARNING(Render_Vulkan, "Unable to find compatible alternative to format = {} with usage {}", + vk::to_string(format), vk::to_string(features)); + return vk::Format::eR8G8B8A8Unorm; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index d54394ac6..898e8efa2 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -8,61 +8,104 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_common.h" -namespace Vulkan { +namespace Frontend { +class EmuWindow; +} + +namespace VideoCore::Vulkan { /// The global Vulkan instance class Instance { public: - Instance() = default; + Instance(Frontend::EmuWindow& window); ~Instance(); - /// Construct global Vulkan context - bool Create(vk::Instance instance, vk::PhysicalDevice gpu, - vk::SurfaceKHR surface, bool enable_validation_layer); + /// Returns the Vulkan instance + vk::Instance GetInstance() const { + return instance; + } - vk::Device GetDevice() const { return device; } - vk::PhysicalDevice GetPhysicalDevice() const { return physical_device; } - vk::Instance GetInstance() const { return instance; } + /// Returns the Vulkan surface + vk::SurfaceKHR GetSurface() const { + return surface; + } + + /// Returns the current physical device + vk::PhysicalDevice GetPhysicalDevice() const { + return physical_device; + } + + /// Returns the Vulkan device + vk::Device GetDevice() const { + return device; + } + + VmaAllocator GetAllocator() const { + return allocator; + } /// Retrieve queue information - u32 GetGraphicsQueueFamilyIndex() const { return graphics_queue_family_index; } - u32 GetPresentQueueFamilyIndex() const { return present_queue_family_index; } - vk::Queue GetGraphicsQueue() const { return graphics_queue; } - vk::Queue GetPresentQueue() const { return present_queue; } + u32 GetGraphicsQueueFamilyIndex() const { + return graphics_queue_family_index; + } + + u32 GetPresentQueueFamilyIndex() const { + return present_queue_family_index; + } + + vk::Queue GetGraphicsQueue() const { + return graphics_queue; + } + + vk::Queue GetPresentQueue() const { + return present_queue; + } /// Feature support - bool SupportsAnisotropicFiltering() const; - u32 UniformMinAlignment() const { return static_cast(device_limits.minUniformBufferOffsetAlignment); } + bool IsDynamicRenderingSupported() const { + return dynamic_rendering; + } + + bool IsExtendedDynamicStateSupported() const { + return extended_dynamic_state; + } + + bool IsPushDescriptorsSupported() const { + return push_descriptors; + } + + /// Returns the minimum required alignment for uniforms + vk::DeviceSize UniformMinAlignment() const { + return device_limits.minUniformBufferOffsetAlignment; + } + + /// Returns true when the format supports the provided feature flags + bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const; + + /// Returns the most compatible format that supports the provided feature flags + vk::Format GetFormatAlternative(vk::Format format) const; private: - bool CreateDevice(vk::SurfaceKHR surface, bool validation_enabled); - bool FindExtensions(); - bool FindFeatures(); + bool CreateDevice(bool validation_enabled); + void CreateAllocator(); -public: +private: // Queue family indexes - u32 present_queue_family_index{}, graphics_queue_family_index{}; + u32 present_queue_family_index = 0, graphics_queue_family_index = 0; vk::Queue present_queue, graphics_queue; // Core vulkan objects + vk::Device device; vk::PhysicalDevice physical_device; vk::Instance instance; - vk::Device device; - - // Extensions and features - std::vector extensions; - vk::PhysicalDeviceFeatures2 features{}; + vk::SurfaceKHR surface; vk::PhysicalDeviceLimits device_limits; + VmaAllocator allocator; // Features per vulkan version - vk::PhysicalDeviceFeatures vk_features{}; - vk::PhysicalDeviceVulkan13Features vk13_features{}; - vk::PhysicalDeviceVulkan12Features vk12_features{}; - vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state_features{}; - vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state2_features{}; - vk::PhysicalDeviceColorWriteEnableFeaturesEXT color_write_features{}; + bool dynamic_rendering = false; + bool extended_dynamic_state = false; + bool push_descriptors = false; }; -extern std::unique_ptr g_vk_instace; - -} // namespace Vulkan +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.cpp b/src/video_core/renderer_vulkan/vk_pipeline.cpp new file mode 100644 index 000000000..b5dd38ac1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.cpp @@ -0,0 +1,414 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/pica_to_vulkan.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_texture.h" +#include "video_core/renderer_vulkan/vk_buffer.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace VideoCore::Vulkan { + +// Maximum binding per descriptor set +constexpr u32 MAX_BINDING_SLOTS = 7; + +vk::ShaderStageFlags ToVkStageFlags(BindingType type) { + vk::ShaderStageFlags flags; + switch (type) { + case BindingType::Sampler: + case BindingType::Texture: + case BindingType::TexelBuffer: + flags = vk::ShaderStageFlagBits::eFragment; + break; + case BindingType::StorageImage: + case BindingType::Uniform: + case BindingType::UniformDynamic: + flags = vk::ShaderStageFlagBits::eFragment | + vk::ShaderStageFlagBits::eVertex | + vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eCompute; + break; + default: + LOG_ERROR(Render_Vulkan, "Unknown descriptor type!"); + } + + return flags; +} + +vk::DescriptorType ToVkDescriptorType(BindingType type) { + switch (type) { + case BindingType::Uniform: + return vk::DescriptorType::eUniformBuffer; + case BindingType::UniformDynamic: + return vk::DescriptorType::eUniformBufferDynamic; + case BindingType::TexelBuffer: + return vk::DescriptorType::eUniformTexelBuffer; + case BindingType::Texture: + return vk::DescriptorType::eSampledImage; + case BindingType::Sampler: + return vk::DescriptorType::eSampler; + case BindingType::StorageImage: + return vk::DescriptorType::eStorageImage; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown descriptor type!"); + UNREACHABLE(); + } +} + +u32 AttribBytes(VertexAttribute attrib) { + switch (attrib.type) { + case AttribType::Float: + return sizeof(float) * attrib.components; + case AttribType::Int: + return sizeof(u32) * attrib.components; + case AttribType::Short: + return sizeof(u16) * attrib.components; + } +} + +vk::Format ToVkAttributeFormat(VertexAttribute attrib) { + switch (attrib.type) { + case AttribType::Float: + switch (attrib.components) { + case 1: return vk::Format::eR32Sfloat; + case 2: return vk::Format::eR32G32Sfloat; + case 3: return vk::Format::eR32G32B32Sfloat; + case 4: return vk::Format::eR32G32B32A32Sfloat; + } + default: + LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!"); + UNREACHABLE(); + } +} + +vk::ShaderStageFlagBits ToVkShaderStage(ShaderStage stage) { + switch (stage) { + case ShaderStage::Vertex: + return vk::ShaderStageFlagBits::eVertex; + case ShaderStage::Fragment: + return vk::ShaderStageFlagBits::eFragment; + case ShaderStage::Geometry: + return vk::ShaderStageFlagBits::eGeometry; + case ShaderStage::Compute: + return vk::ShaderStageFlagBits::eCompute; + default: + LOG_CRITICAL(Render_Vulkan, "Undefined shader stage!"); + UNREACHABLE(); + } +} + +PipelineLayout::PipelineLayout(Instance& instance, PipelineLayoutInfo info) : + instance(instance), set_layout_count(info.group_count) { + + // Used as temp storage for CreateDescriptorSet + std::array set_bindings; + std::array update_entries; + + vk::Device device = instance.GetDevice(); + for (u32 set = 0; set < set_layout_count; set++) { + auto& group = info.binding_groups[set]; + + u32 binding = 0; + while (group[binding] != BindingType::None) { + const BindingType type = group[binding]; + set_bindings[binding] = vk::DescriptorSetLayoutBinding{ + .binding = binding, + .descriptorType = ToVkDescriptorType(type), + .descriptorCount = 1, + .stageFlags = ToVkStageFlags(type) + }; + + // Also create update template to speed up descriptor writes + update_entries[binding] = vk::DescriptorUpdateTemplateEntry{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = ToVkDescriptorType(type), + .offset = binding * sizeof(DescriptorData), + .stride = sizeof(DescriptorData) + }; + + binding++; + } + + const vk::DescriptorSetLayoutCreateInfo layout_info = { + .bindingCount = binding, + .pBindings = set_bindings.data() + }; + + // Create descriptor set layout + set_layouts[set] = device.createDescriptorSetLayout(layout_info); + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = binding, + .pDescriptorUpdateEntries = update_entries.data(), + .descriptorSetLayout = set_layouts[set] + }; + + // Create descriptor set update template + update_templates[set] = device.createDescriptorUpdateTemplate(template_info); + } + + // Create pipeline layout + const vk::PushConstantRange range = { + .offset = 0, + .size = info.push_constant_block_size + }; + + bool push_constants = info.push_constant_block_size > 0; + const u32 range_count = push_constants ? 1u : 0u; + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = set_layout_count, + .pSetLayouts = set_layouts.data(), + .pushConstantRangeCount = range_count, + .pPushConstantRanges = &range + }; + + pipeline_layout = device.createPipelineLayout(layout_info); +} + +PipelineLayout::~PipelineLayout() { + vk::Device device = instance.GetDevice(); + device.destroyPipelineLayout(pipeline_layout); + + u32 i = 0; + while (set_layouts[i] && update_templates[i]) { + device.destroyDescriptorSetLayout(set_layouts[i]); + device.destroyDescriptorUpdateTemplate(update_templates[i]); + } +} + +Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, + PipelineInfo info, vk::PipelineCache cache) : PipelineBase(type, info), + instance(instance), owner(owner) { + + vk::Device device = instance.GetDevice(); + + u32 shader_count = 0; + std::array shader_stages; + for (int i = 0; i < info.shaders.size(); i++) { + auto& shader = info.shaders[i]; + if (!shader.IsValid()) { + shader_count = i; + break; + } + + Shader* vk_shader = static_cast(shader.Get()); + shader_stages[i] = vk::PipelineShaderStageCreateInfo{ + .stage = ToVkShaderStage(shader->GetStage()), + .module = vk_shader->GetHandle(), + .pName = shader->GetName().data(), + }; + } + + // Create a graphics pipeline + if (type == PipelineType::Graphics) { + const vk::VertexInputBindingDescription binding_desc = { + .binding = 0, + .stride = info.vertex_layout.stride + }; + + // Populate vertex attribute structures + u32 attribute_count = 0; + std::array attribute_desc; + for (u32 i = 0; i < MAX_VERTEX_ATTRIBUTES; i++) { + auto& attr = info.vertex_layout.attributes[i]; + if (attr.components == 0) { + attribute_count = i; + break; + } + + attribute_desc[i] = vk::VertexInputAttributeDescription{ + .location = i, + .binding = 0, + .format = ToVkAttributeFormat(attr), + .offset = (i > 0 ? attribute_desc[i - 1].offset + + AttribBytes(info.vertex_layout.attributes[i - 1]) : 0) + }; + } + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &binding_desc, + .vertexAttributeDescriptionCount = attribute_count, + .pVertexAttributeDescriptions = attribute_desc.data() + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = PicaToVK::PrimitiveTopology(info.rasterization.topology), + .primitiveRestartEnable = false + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = PicaToVK::CullMode(info.rasterization.cull_mode), + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = false, + .lineWidth = 1.0f + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = true, + .srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor), + .dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor), + .colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq), + .srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor), + .dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor), + .alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq), + .colorWriteMask = static_cast(info.blending.color_write_mask) + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = true, + .logicOp = vk::LogicOp::eCopy, // TODO + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + }; + + const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported(); + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + vk::DynamicState::eLineWidth, + vk::DynamicState::eStencilCompareMask, + vk::DynamicState::eStencilWriteMask, + vk::DynamicState::eStencilReference, + // VK_EXT_extended_dynamic_state + vk::DynamicState::eCullModeEXT, + vk::DynamicState::eDepthCompareOpEXT, + vk::DynamicState::eDepthTestEnableEXT, + vk::DynamicState::eDepthWriteEnableEXT, + vk::DynamicState::eFrontFaceEXT, + vk::DynamicState::ePrimitiveTopologyEXT, + vk::DynamicState::eStencilOpEXT, + vk::DynamicState::eStencilTestEnableEXT, + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = extended_dynamic_states ? 14u : 6u, + .pDynamicStates = dynamic_states.data() + }; + + const vk::StencilOpState stencil_op_state = { + .failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), + .passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), + .depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), + .compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op), + .compareMask = static_cast(info.depth_stencil.stencil_compare_mask.Value()), + .writeMask = static_cast(info.depth_stencil.stencil_write_mask.Value()), + .reference = static_cast(info.depth_stencil.stencil_reference.Value()) + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = static_cast(info.depth_stencil.depth_test_enable.Value()), + .depthWriteEnable = static_cast(info.depth_stencil.depth_write_enable.Value()), + .depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op), + .depthBoundsTestEnable = false, + .stencilTestEnable = static_cast(info.depth_stencil.stencil_test_enable.Value()), + .front = stencil_op_state, + .back = stencil_op_state + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = shader_count, + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = owner.GetLayout(), + .renderPass = {} + }; + + if (auto result = device.createGraphicsPipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) { + pipeline = result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!"); + UNREACHABLE(); + } + } else { // Compute pipeline + ASSERT(shader_count == 1); + const vk::ComputePipelineCreateInfo pipeline_info = { + .stage = shader_stages[0], + .layout = owner.GetLayout() + }; + + if (auto result = device.createComputePipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) { + pipeline = result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!"); + UNREACHABLE(); + } + + } +} + +Pipeline::~Pipeline() { + vk::Device device = instance.GetDevice(); + device.destroyPipeline(pipeline); +} + + +void Pipeline::BindTexture(u32 group, u32 slot, TextureHandle handle) { + Texture* texture = static_cast(handle.Get()); + + const DescriptorData data = { + .image_info = vk::DescriptorImageInfo{ + .imageView = texture->GetView(), + .imageLayout = texture->GetLayout() + } + }; + + owner.SetBinding(group, slot, data); +} + +void Pipeline::BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view) { + Buffer* buffer = static_cast(handle.Get()); + + // Texel buffers are bound with their views + if (buffer->GetUsage() == BufferUsage::Texel) { + const DescriptorData data = { + .buffer_view = buffer->GetView(view) + }; + + owner.SetBinding(group, slot, data); + } else { + const DescriptorData data = { + .buffer_info = vk::DescriptorBufferInfo{ + .buffer = buffer->GetHandle(), + .offset = 0, + .range = buffer->GetCapacity() + } + }; + + owner.SetBinding(group, slot, data); + } +} + +void Pipeline::BindSampler(u32 group, u32 slot, SamplerHandle handle) { + Sampler* sampler = static_cast(handle.Get()); + + const DescriptorData data = { + .image_info = vk::DescriptorImageInfo{ + .sampler = sampler->GetHandle() + } + }; + + owner.SetBinding(group, slot, data); +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..2489d38ec --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,96 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "video_core/common/pipeline.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore::Vulkan { + +class Instance; +class CommandScheduler; + +union DescriptorData { + vk::DescriptorImageInfo image_info{}; + vk::DescriptorBufferInfo buffer_info; + vk::BufferView buffer_view; +}; + +/** + * Stores the pipeline layout as well as the descriptor set layouts + * and update templates associated with those layouts. + * Functions as the "parent" to a group of pipelines that share the same layout + */ +class PipelineLayout { +public: + PipelineLayout(Instance& instance, PipelineLayoutInfo info); + ~PipelineLayout(); + + // Disable copy constructor + PipelineLayout(const PipelineLayout&) = delete; + PipelineLayout& operator=(const PipelineLayout&) = delete; + + // Assigns data to a particular binding + void SetBinding(u32 set, u32 binding, DescriptorData data) { + update_data[set][binding] = data; + } + + // Returns the most current descriptor update data + std::span GetData(u32 set) { + return std::span{update_data.at(set).data(), set_layout_count}; + } + + // Returns the underlying vulkan pipeline layout handle + vk::PipelineLayout GetLayout() const { + return pipeline_layout; + } + + // Returns the descriptor set update template handle associated with the provided set index + vk::DescriptorUpdateTemplate GetUpdateTemplate(u32 set) const { + return update_templates.at(set); + } + +private: + Instance& instance; + vk::PipelineLayout pipeline_layout = VK_NULL_HANDLE; + u32 set_layout_count = 0; + std::array set_layouts; + std::array update_templates; + + // Update data for the descriptor sets + using SetData = std::array; + std::array update_data; +}; + +class Pipeline : public VideoCore::PipelineBase { +public: + Pipeline(Instance& instance, PipelineLayout& owner, + PipelineType type, PipelineInfo info, vk::PipelineCache cache); + ~Pipeline() override; + + void BindTexture(u32 group, u32 slot, TextureHandle handle) override; + + void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) override; + + void BindSampler(u32 group, u32 slot, SamplerHandle handle) override; + + /// Returns the layout tracker that owns this pipeline + PipelineLayout& GetOwner() const { + return owner; + } + + /// Returns the underlying vulkan pipeline handle + vk::Pipeline GetHandle() const { + return pipeline; + } + +private: + Instance& instance; + PipelineLayout& owner; + vk::Pipeline pipeline; +}; + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp b/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp deleted file mode 100644 index 10c083c35..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/renderer_vulkan/vk_pipeline_builder.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_shader_state.h" -#include -#include -#include - -namespace Vulkan { - -PipelineLayoutBuilder::PipelineLayoutBuilder() { - Clear(); -} - -void PipelineLayoutBuilder::Clear() { - pipeline_layout_info = vk::PipelineLayoutCreateInfo{}; -} - -vk::PipelineLayout PipelineLayoutBuilder::Build() { - auto device = g_vk_instace->GetDevice(); - - auto result = device.createPipelineLayout(pipeline_layout_info); - if (!result) { - LOG_ERROR(Render_Vulkan, "Failed to create pipeline layout"); - return VK_NULL_HANDLE; - } - - return result; -} - -void PipelineLayoutBuilder::AddDescriptorSet(vk::DescriptorSetLayout layout) { - assert(pipeline_layout_info.setLayoutCount < MAX_SETS); - - sets[pipeline_layout_info.setLayoutCount++] = layout; - pipeline_layout_info.pSetLayouts = sets.data(); -} - -void PipelineLayoutBuilder::AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size) { - assert(pipeline_layout_info.pushConstantRangeCount < MAX_PUSH_CONSTANTS); - - push_constants[pipeline_layout_info.pushConstantRangeCount++] = {stages, offset, size}; - pipeline_layout_info.pPushConstantRanges = push_constants.data(); -} - -PipelineBuilder::PipelineBuilder() { - Clear(); -} - -void PipelineBuilder::Clear() { - pipeline_info = vk::GraphicsPipelineCreateInfo{}; - shader_stages.clear(); - - vertex_input_state = vk::PipelineVertexInputStateCreateInfo{}; - input_assembly = vk::PipelineInputAssemblyStateCreateInfo{}; - rasterization_state = vk::PipelineRasterizationStateCreateInfo{}; - depth_state = vk::PipelineDepthStencilStateCreateInfo{}; - - blend_state = vk::PipelineColorBlendStateCreateInfo{}; - blend_attachment = vk::PipelineColorBlendAttachmentState{}; - dynamic_info = vk::PipelineDynamicStateCreateInfo{}; - dynamic_states.fill({}); - - viewport_state = vk::PipelineViewportStateCreateInfo{}; - multisample_info = vk::PipelineMultisampleStateCreateInfo{}; - - // Set defaults - SetNoCullRasterizationState(); - SetNoDepthTestState(); - SetNoBlendingState(); - SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleList); - - // Have to be specified even if dynamic - SetViewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f); - SetScissorRect(0, 0, 1, 1); - SetBlendConstants(1.0f, 1.0f, 1.0f, 1.0f); - SetMultisamples(vk::SampleCountFlagBits::e1, false); -} - -vk::Pipeline PipelineBuilder::Build() { - auto device = g_vk_instace->GetDevice(); - - auto result = device.createGraphicsPipeline({}, pipeline_info); - if (result.result != vk::Result::eSuccess) { - LOG_CRITICAL(Render_Vulkan, "Failed to build vulkan pipeline!"); - UNREACHABLE(); - } - - return result.value; -} - -void PipelineBuilder::SetPipelineLayout(vk::PipelineLayout layout) { - pipeline_info.layout = layout; -} - -void PipelineBuilder::SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module) { - auto result = std::ranges::find_if(shader_stages.begin(), shader_stages.end(), [stage](const auto& info) { - return info.stage == stage; - }); - - /* If the stage already exists, just replace the module */ - if (result != shader_stages.end()) { - result->module = module; - } - else { - shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags(), stage, module, "main"); - pipeline_info.stageCount++; - } - - pipeline_info.pStages = shader_stages.data(); -} - -void PipelineBuilder::AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate, - std::span attributes) { - // Copy attributes to private array - auto loc = vertex_attributes.begin() + vertex_input_state.vertexAttributeDescriptionCount; - std::copy(attributes.begin(), attributes.end(), loc); - - vertex_buffers[vertex_input_state.vertexBindingDescriptionCount++] = {binding, stride, input_rate}; - vertex_input_state.vertexAttributeDescriptionCount += attributes.size(); - - vertex_input_state.pVertexBindingDescriptions = vertex_buffers.data(); - vertex_input_state.pVertexAttributeDescriptions = vertex_attributes.data(); - - pipeline_info.pVertexInputState = &vertex_input_state; -} - -void PipelineBuilder::SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart) { - input_assembly.topology = topology; - input_assembly.primitiveRestartEnable = enable_primitive_restart; - pipeline_info.pInputAssemblyState = &input_assembly; -} - -void PipelineBuilder::SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode, - vk::FrontFace front_face) { - rasterization_state.polygonMode = polygon_mode; - rasterization_state.cullMode = cull_mode; - rasterization_state.frontFace = front_face; - pipeline_info.pRasterizationState = &rasterization_state; -} - -void PipelineBuilder::SetLineWidth(float width) { - rasterization_state.lineWidth = width; - pipeline_info.pRasterizationState = &rasterization_state; -} - -void PipelineBuilder::SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading) { - multisample_info.rasterizationSamples = samples; - multisample_info.sampleShadingEnable = per_sample_shading; - multisample_info.minSampleShading = (static_cast(samples) > 1) ? 1.0f : 0.0f; - pipeline_info.pMultisampleState = &multisample_info; -} - -void PipelineBuilder::SetNoCullRasterizationState() { - SetRasterizationState(vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, vk::FrontFace::eClockwise); -} - -void PipelineBuilder::SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op) { - depth_state.depthTestEnable = depth_test; - depth_state.depthWriteEnable = depth_write; - depth_state.depthCompareOp = compare_op; - pipeline_info.pDepthStencilState = &depth_state; -} - -void PipelineBuilder::SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back) { - depth_state.stencilTestEnable = stencil_test; - depth_state.front = front; - depth_state.back = back; - pipeline_info.pDepthStencilState = &depth_state; -} - -void PipelineBuilder::SetNoStencilState() { - depth_state.stencilTestEnable = VK_FALSE; - depth_state.front = vk::StencilOpState{}; - depth_state.back = vk::StencilOpState{}; -} - -void PipelineBuilder::SetNoDepthTestState() { - SetDepthState(false, false, vk::CompareOp::eAlways); -} - -void PipelineBuilder::SetBlendConstants(float r, float g, float b, float a) { - blend_state.blendConstants = std::array{r, g, b, a}; - pipeline_info.pColorBlendState = &blend_state; -} - -void PipelineBuilder::SetBlendLogicOp(vk::LogicOp logic_op) { - blend_state.logicOp = logic_op; - blend_state.logicOpEnable = false; -} - -void PipelineBuilder::SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor, - vk::BlendOp op, vk::BlendFactor alpha_src_factor, - vk::BlendFactor alpha_dst_factor, vk::BlendOp alpha_op, - vk::ColorComponentFlags write_mask) { - blend_attachment.blendEnable = blend_enable; - blend_attachment.srcColorBlendFactor = src_factor; - blend_attachment.dstColorBlendFactor = dst_factor; - blend_attachment.colorBlendOp = op; - blend_attachment.srcAlphaBlendFactor = alpha_src_factor; - blend_attachment.dstAlphaBlendFactor = alpha_dst_factor; - blend_attachment.alphaBlendOp = alpha_op; - blend_attachment.colorWriteMask = write_mask; - - blend_state.attachmentCount = 1; - blend_state.pAttachments = &blend_attachment; - pipeline_info.pColorBlendState = &blend_state; -} - -void PipelineBuilder::SetNoBlendingState() { - SetBlendAttachment(false, vk::BlendFactor::eOne, vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::BlendFactor::eOne, - vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); -} - -void PipelineBuilder::SetDynamicStates(const std::span states) { - if (states.size() > MAX_DYNAMIC_STATES) { - LOG_ERROR(Render_Vulkan, "Cannot include more dynamic states!"); - UNREACHABLE(); - } - - // Copy the state data - std::copy(states.begin(), states.end(), dynamic_states.begin()); - dynamic_info.dynamicStateCount = states.size(); - dynamic_info.pDynamicStates = dynamic_states.data(); - pipeline_info.pDynamicState = &dynamic_info; - return; -} - -void PipelineBuilder::SetRenderingFormats(vk::Format color, vk::Format depth_stencil) { - color_format = color; - depth_stencil_format = depth_stencil; - - auto IsStencil = [](vk::Format format) -> bool { - switch (format) { - case vk::Format::eD16UnormS8Uint: - case vk::Format::eD24UnormS8Uint: - case vk::Format::eD32SfloatS8Uint: - return true; - default: - return false; - }; - }; - - const u32 color_attachment_count = color == vk::Format::eUndefined ? 0 : 1; - rendering_info = vk::PipelineRenderingCreateInfo{0, color_attachment_count, &color_format, depth_stencil_format, - IsStencil(depth_stencil) ? depth_stencil : vk::Format::eUndefined}; - pipeline_info.pNext = &rendering_info; -} - -void PipelineBuilder::SetViewport(float x, float y, float width, float height, float min_depth, float max_depth) { - viewport = vk::Viewport{x, y, width, height, min_depth, max_depth}; - viewport_state.pViewports = &viewport; - viewport_state.viewportCount = 1; - pipeline_info.pViewportState = &viewport_state; -} - -void PipelineBuilder::SetScissorRect(s32 x, s32 y, u32 width, u32 height) { - scissor = vk::Rect2D{{x, y}, {width, height}}; - viewport_state.scissorCount = 1u; - viewport_state.pScissors = &scissor; - pipeline_info.pViewportState = &viewport_state; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_builder.h b/src/video_core/renderer_vulkan/vk_pipeline_builder.h deleted file mode 100644 index 1fd9a8660..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline_builder.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include "video_core/renderer_vulkan/vk_texture.h" - -namespace Vulkan { - -class PipelineLayoutBuilder { -public: - PipelineLayoutBuilder(); - ~PipelineLayoutBuilder() = default; - - void Clear(); - vk::PipelineLayout Build(); - - void AddDescriptorSet(vk::DescriptorSetLayout layout); - void AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size); - -private: - static constexpr u32 MAX_SETS = 8; - static constexpr u32 MAX_PUSH_CONSTANTS = 5; - - vk::PipelineLayoutCreateInfo pipeline_layout_info; - std::array sets; - std::array push_constants; -}; - -class PipelineBuilder { -public: - PipelineBuilder(); - ~PipelineBuilder() = default; - - void Clear(); - vk::Pipeline Build(); - - void SetPipelineLayout(vk::PipelineLayout layout); - void AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate, - const std::span attributes); - void SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module); - - void SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart = false); - void SetLineWidth(float width); - void SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading); - void SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode, - vk::FrontFace front_face); - - void SetNoCullRasterizationState(); - void SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op); - void SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back); - void SetNoDepthTestState(); - void SetNoStencilState(); - - void SetBlendConstants(float r, float g, float b, float a); - void SetNoBlendingState(); - void SetBlendLogicOp(vk::LogicOp logic_op); - void SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor, - vk::BlendOp op, vk::BlendFactor alpha_src_factor, vk::BlendFactor alpha_dst_factor, - vk::BlendOp alpha_op,vk::ColorComponentFlags write_mask); - - void SetViewport(float x, float y, float width, float height, float min_depth, float max_depth); - void SetScissorRect(s32 x, s32 y, u32 width, u32 height); - void SetDynamicStates(const std::span states); - void SetRenderingFormats(vk::Format color, vk::Format depth_stencil = vk::Format::eUndefined); - -private: - static constexpr u32 MAX_DYNAMIC_STATES = 20; - static constexpr u32 MAX_SHADER_STAGES = 3; - static constexpr u32 MAX_VERTEX_BUFFERS = 8; - static constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; - - vk::GraphicsPipelineCreateInfo pipeline_info; - std::vector shader_stages; - - vk::PipelineVertexInputStateCreateInfo vertex_input_state; - std::array vertex_buffers; - std::array vertex_attributes; - - vk::PipelineInputAssemblyStateCreateInfo input_assembly; - vk::PipelineRasterizationStateCreateInfo rasterization_state; - vk::PipelineDepthStencilStateCreateInfo depth_state; - - // Blending - vk::PipelineColorBlendStateCreateInfo blend_state; - vk::PipelineColorBlendAttachmentState blend_attachment; - vk::PipelineDynamicStateCreateInfo dynamic_info; - std::array dynamic_states; - - vk::PipelineViewportStateCreateInfo viewport_state; - vk::Viewport viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; - vk::Rect2D scissor; - - // Multisampling - vk::PipelineMultisampleStateCreateInfo multisample_info; - vk::PipelineRenderingCreateInfo rendering_info; - vk::Format color_format, depth_stencil_format; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.h b/src/video_core/renderer_vulkan/vk_platform.h new file mode 100644 index 000000000..0e4f4c43d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_platform.h @@ -0,0 +1,130 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +// Include the vulkan platform specific header +#if defined(ANDROID) || defined (__ANDROID__) + #define VK_USE_PLATFORM_ANDROID_KHR 1 +#elif defined(_WIN32) + #define VK_USE_PLATFORM_WIN32_KHR 1 +#elif defined(__APPLE__) + #define VK_USE_PLATFORM_MACOS_MVK 1 + #define VK_USE_PLATFORM_METAL_EXT 1 +#else + #ifdef WAYLAND_DISPLAY + #define VK_USE_PLATFORM_WAYLAND_KHR 1 + #else // wayland + #define VK_USE_PLATFORM_XLIB_KHR 1 + #endif +#endif + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/logging/log.h" +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore::Vulkan { + +inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend::EmuWindow& emu_window) { + const auto& window_info = emu_window.GetWindowInfo(); + vk::SurfaceKHR surface; + +#if VK_USE_PLATFORM_WIN32_KHR + if (window_info.type == Frontend::WindowSystemType::Windows) { + const vk::Win32SurfaceCreateInfoKHR win32_ci = { + .hinstance = nullptr, + .hwnd = static_cast(window_info.render_surface) + }; + + if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface"); + } + } +#elif VK_USE_PLATFORM_XLIB_KHR + if (window_info.type == Frontend::WindowSystemType::X11) { + const vk::XlibSurfaceCreateInfoKHR xlib_ci{{}, + static_cast(window_info.display_connection), + reinterpret_cast(window_info.render_surface)}; + if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + UNREACHABLE(); + } + } + +#elif VK_USE_PLATFORM_WAYLAND_KHR + if (window_info.type == Frontend::WindowSystemType::Wayland) { + const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{}, + static_cast(window_info.display_connection), + static_cast(window_info.render_surface)}; + if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + UNREACHABLE(); + } + } +#endif + + if (!surface) { + LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform"); + } + + return surface; +} + +inline auto GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) { + const auto properties = vk::enumerateInstanceExtensionProperties(); + if (properties.empty()) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return std::vector{}; + } + + // Add the windowing system specific extension + std::vector extensions; + extensions.reserve(6); + + switch (window_type) { + case Frontend::WindowSystemType::Headless: + break; +#if VK_USE_PLATFORM_WIN32_KHR + case Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#elif VK_USE_PLATFORM_XLIB_KHR + case Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; +#elif VK_USE_PLATFORM_WAYLAND_KHR + case Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + + if (window_type != Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + + if (enable_debug_utils) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + for (const char* extension : extensions) { + const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) { + return std::strcmp(extension, prop.extensionName) == 0; + }); + + if (iter == properties.end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return std::vector{}; + } + } + + return extensions; +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 910c98ee6..76be3845e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -5,15 +5,10 @@ #include #include #include -#include #include -#include #include "common/alignment.h" -#include "common/assert.h" -#include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" -#include "common/scope_exit.h" #include "common/vector_math.h" #include "core/hw/gpu.h" #include "video_core/pica_state.h" @@ -21,27 +16,45 @@ #include "video_core/regs_rasterizer.h" #include "video_core/regs_texturing.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_vulkan/vk_surface_params.h" #include "video_core/renderer_vulkan/pica_to_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" -#include "video_core/video_core.h" namespace Vulkan { +MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(Vulkan_Blits, "Vulkan", "Blits", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(Vulkan_CacheManagement, "Vulkan", "Cache Management", MP_RGB(100, 255, 100)); + using PixelFormat = SurfaceParams::PixelFormat; using SurfaceType = SurfaceParams::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); -MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); -MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); +// They shall be big enough for about one frame. +constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024; +constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; +constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; +constexpr u32 TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; + +constexpr std::array LUT_LF_VIEWS = { + vk::Format::eR32G32Sfloat +}; + +constexpr std::array LUT_VIEWS = { + vk::Format::eR32G32Sfloat, + vk::Format::eR32G32B32A32Sfloat +}; + +RasterizerVulkan::RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window) : + scheduler(scheduler), vertex_buffer(scheduler, VERTEX_BUFFER_SIZE, BufferUsage::Vertex), + index_buffer(scheduler, INDEX_BUFFER_SIZE, BufferUsage::Index), + uniform_buffer(scheduler, UNIFORM_BUFFER_SIZE, BufferUsage::Uniform), + texture_buffer_lut_lf(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_LF_VIEWS), + texture_buffer_lut(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_VIEWS) { -RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) { // Implement shadow allow_shadow = false; @@ -65,29 +78,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) { uniform_buffer_alignment); uniform_size_aligned_fs = Common::AlignUp(sizeof(UniformData), uniform_buffer_alignment); - // Allocate texture buffer LUTs - Buffer::Info texel_buffer_info = { - .size = TEXTURE_BUFFER_SIZE, - .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, - .usage = vk::BufferUsageFlagBits::eUniformTexelBuffer | - vk::BufferUsageFlagBits::eTransferDst, - }; - - texel_buffer_info.view_formats[0] = vk::Format::eR32G32Sfloat; - texture_buffer_lut_lf.Create(texel_buffer_info); - - texel_buffer_info.view_formats[1] = vk::Format::eR32G32B32A32Sfloat; - texture_buffer_lut.Create(texel_buffer_info); - - // Create and bind uniform buffers - Buffer::Info uniform_info = { - .size = UNIFORM_BUFFER_SIZE, - .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, - .usage = vk::BufferUsageFlagBits::eUniformBuffer | - vk::BufferUsageFlagBits::eTransferDst - }; - - uniform_buffer.Create(uniform_info); auto& state = VulkanState::Get(); state.SetUniformBuffer(0, 0, uniform_size_aligned_vs, uniform_buffer); state.SetUniformBuffer(1, uniform_size_aligned_vs, uniform_size_aligned_fs, uniform_buffer); @@ -97,26 +87,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) { state.SetTexelBuffer(1, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 0); state.SetTexelBuffer(2, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 1); - // Create vertex and index buffers - Buffer::Info vertex_info = { - .size = VERTEX_BUFFER_SIZE, - .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, - .usage = vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eTransferDst - }; - - Buffer::Info index_info = { - .size = INDEX_BUFFER_SIZE, - .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, - .usage = vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eTransferDst - }; - - vertex_buffer.Create(vertex_info); - index_buffer.Create(index_info); - // Set clear texture color - state.SetPlaceholderColor(255, 0, 0, 255); + state.SetPlaceholderColor(255, 255, 255, 255); SyncEntireState(); } @@ -238,7 +210,7 @@ void RasterizerVulkan::DrawTriangles() { } bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { - MICROPROFILE_SCOPE(OpenGL_Drawing); + MICROPROFILE_SCOPE(Vulkan_Drawing); const auto& regs = Pica::g_state.regs; auto& state = VulkanState::Get(); @@ -252,6 +224,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { const bool using_color_fb = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0; + const bool using_depth_fb = !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0); @@ -304,15 +277,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. - GLint scissor_x1 = - static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); - GLint scissor_y1 = - static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); + int scissor_x1 = + static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + int scissor_y1 = + static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // scaling or doing multisampling. - GLint scissor_x2 = - static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); - GLint scissor_y2 = static_cast(surfaces_rect.bottom + + int scissor_x2 = + static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + int scissor_y2 = static_cast(surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); if (uniform_block_data.data.scissor_x1 != scissor_x1 || @@ -420,8 +393,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface->texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); } - g_vk_task_scheduler->Submit(); - return true; } @@ -924,22 +895,22 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) { } void RasterizerVulkan::FlushAll() { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); + MICROPROFILE_SCOPE(Vulkan_CacheManagement); res_cache.FlushAll(); } void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); + MICROPROFILE_SCOPE(Vulkan_CacheManagement); res_cache.FlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); + MICROPROFILE_SCOPE(Vulkan_CacheManagement); res_cache.InvalidateRegion(addr, size, nullptr); } void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); + MICROPROFILE_SCOPE(Vulkan_CacheManagement); res_cache.FlushRegion(addr, size); res_cache.InvalidateRegion(addr, size, nullptr); } @@ -949,7 +920,7 @@ void RasterizerVulkan::ClearAll(bool flush) { } bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { - MICROPROFILE_SCOPE(OpenGL_Blits); + MICROPROFILE_SCOPE(Vulkan_Blits); SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); @@ -1099,7 +1070,7 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con if (framebuffer_addr == 0) { return false; } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); + MICROPROFILE_SCOPE(Vulkan_CacheManagement); SurfaceParams src_params; src_params.addr = framebuffer_addr; @@ -1421,7 +1392,7 @@ void RasterizerVulkan::SyncLightSpotDirection(int light_index) { } void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) { - GLfloat dist_atten_bias = + float dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) .ToFloat32(); @@ -1432,7 +1403,7 @@ void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) { } void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) { - GLfloat dist_atten_scale = + float dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) .ToFloat32(); @@ -1444,8 +1415,8 @@ void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) { void RasterizerVulkan::SyncShadowBias() { const auto& shadow = Pica::g_state.regs.framebuffer.shadow; - GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); - GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); + float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); + float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); if (constant != uniform_block_data.data.shadow_bias_constant || linear != uniform_block_data.data.shadow_bias_linear) { @@ -1456,7 +1427,7 @@ void RasterizerVulkan::SyncShadowBias() { } void RasterizerVulkan::SyncShadowTextureBias() { - GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1; + int bias = Pica::g_state.regs.texturing.shadow.bias << 1; if (bias != uniform_block_data.data.shadow_texture_bias) { uniform_block_data.data.shadow_texture_bias = bias; uniform_block_data.dirty = true; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b8ca49cfc..e0ecc0cf9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -5,21 +5,11 @@ #pragma once #include -#include -#include -#include #include #include -#include "common/bit_field.h" #include "common/common_types.h" -#include "common/vector_math.h" -#include "core/hw/gpu.h" -#include "video_core/pica_state.h" -#include "video_core/pica_types.h" #include "video_core/rasterizer_interface.h" -#include "video_core/regs_framebuffer.h" #include "video_core/regs_lighting.h" -#include "video_core/regs_rasterizer.h" #include "video_core/regs_texturing.h" #include "video_core/shader/shader.h" #include "video_core/renderer_vulkan/vk_state.h" @@ -31,7 +21,11 @@ class EmuWindow; namespace Vulkan { -enum class UniformBindings : u32 { Common, VS, GS }; +enum class UniformBindings : u32 { + Common = 0, + VertexShader = 1, + GeometryShader = 2 +}; struct LightSrc { alignas(16) glm::vec3 specular_0; @@ -79,14 +73,13 @@ struct UniformData { alignas(16) glm::vec4 clip_coef; }; -static_assert( - sizeof(UniformData) == 0x4F0, - "The size of the UniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(UniformData) == 0x4F0, + "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. -// NOTE: the same rule from UniformData also applies here. +/// NOTE: the same rule from UniformData also applies here. struct PicaUniformsData { void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); @@ -102,17 +95,18 @@ struct PicaUniformsData { struct VSUniformData { PicaUniformsData uniforms; }; -static_assert( - sizeof(VSUniformData) == 1856, - "The size of the VSUniformData structure has changed, update the structure in the shader"); + +static_assert(sizeof(VSUniformData) == 1856, + "The size of the VSUniformData structure has changed, update the structure in the shader"); static_assert(sizeof(VSUniformData) < 16384, "VSUniformData structure must be less than 16kb as per the OpenGL spec"); struct ScreenInfo; +class CommandScheduler; class RasterizerVulkan : public VideoCore::RasterizerInterface { public: - explicit RasterizerVulkan(Frontend::EmuWindow& emu_window); + explicit RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window); ~RasterizerVulkan() override; void LoadDiskResources(const std::atomic_bool& stop_loading, @@ -252,6 +246,7 @@ private: }; private: + CommandScheduler& scheduler; RasterizerCacheVulkan res_cache; std::vector vertex_batch; bool shader_dirty = true; @@ -269,13 +264,7 @@ private: bool dirty; } uniform_block_data = {}; - // They shall be big enough for about one frame. - static constexpr std::size_t VERTEX_BUFFER_SIZE = 64 * 1024 * 1024; - static constexpr std::size_t INDEX_BUFFER_SIZE = 16 * 1024 * 1024; - static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; - static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; - - Buffer vertex_buffer, index_buffer; + StreamBuffer vertex_buffer, index_buffer; StreamBuffer uniform_buffer, texture_buffer_lut_lf, texture_buffer_lut; u32 uniform_buffer_alignment; @@ -293,4 +282,4 @@ private: bool allow_shadow{}; }; -} // namespace OpenGL +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp index 66b948c53..160496023 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp @@ -16,19 +16,11 @@ #include #include "common/alignment.h" #include "common/bit_field.h" -#include "common/color.h" #include "common/logging/log.h" #include "common/microprofile.h" -#include "common/scope_exit.h" -#include "common/texture.h" #include "common/vector_math.h" -#include "core/core.h" -#include "core/frontend/emu_window.h" -#include "core/hle/kernel/process.h" #include "core/memory.h" -#include "core/settings.h" #include "video_core/pica_state.h" -#include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/renderer_vulkan/vk_rasterizer_cache.h" #include "video_core/renderer_vulkan/vk_format_reinterpreter.h" @@ -375,7 +367,7 @@ static vk::Rect2D FromRect(Common::Rectangle rect) { // Allocate an uninitialized texture of appropriate size and format for the surface void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, vk::Format format, - u32 width, u32 height) { + u32 width, u32 height, bool framebuffer) { // First check if the texture can be recycled auto recycled_tex = host_texture_recycler.find({format, width, height}); if (recycled_tex != host_texture_recycler.end()) { @@ -384,30 +376,31 @@ void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, v return; } - auto GetUsage = [](SurfaceType type) { + auto GetUsage = [framebuffer](SurfaceType type) { auto usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eTransferSrc; - switch (type) { - case SurfaceType::Color: - case SurfaceType::Fill: - case SurfaceType::Texture: - usage |= vk::ImageUsageFlagBits::eColorAttachment; - break; - case SurfaceType::Depth: - case SurfaceType::DepthStencil: - usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; - break; - default: - break; + if (framebuffer) { + switch (type) { + case SurfaceType::Color: + case SurfaceType::Fill: + case SurfaceType::Texture: + usage |= vk::ImageUsageFlagBits::eColorAttachment; + break; + case SurfaceType::Depth: + case SurfaceType::DepthStencil: + usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; + break; + default: + break; + } } - return usage; }; // Otherwise create a brand new texture - u32 levels = std::log2(std::max(width, height)) + 1; + u32 levels = static_cast(std::log2(std::max(width, height))) + 1; Texture::Info texture_info{ .width = width, .height = height, @@ -516,8 +509,9 @@ void CachedSurface::LoadGPUBuffer(PAddr load_start, PAddr load_end) { const bool need_swap = (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8); const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr); - if (texture_src_data == nullptr) + if (texture_src_data == nullptr) { return; + } if (vk_buffer.empty()) { vk_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); @@ -660,9 +654,9 @@ void CachedSurface::UploadGPUTexture(Common::Rectangle rect) { // Load data from memory to the surface auto buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); auto update_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format); - std::span memory(vk_buffer.data() + buffer_offset, update_size); + std::span memory{vk_buffer.data() + buffer_offset, update_size}; - texture.Upload(0, 0, stride, FromRect(rect), memory); + texture.Upload(0, 0, stride, memory); InvalidateAllWatcher(); } @@ -867,7 +861,8 @@ Surface RasterizerCacheVulkan::GetSurface(const SurfaceParams& params, ScaleMatc SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { + bool load_if_create, + bool framebuffer) { if (params.addr == 0 || params.height * params.width == 0) { return std::make_tuple(nullptr, Common::Rectangle{}); } @@ -887,7 +882,7 @@ SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams& SurfaceParams new_params = *surface; new_params.res_scale = params.res_scale; - surface = CreateSurface(new_params); + surface = CreateSurface(new_params, framebuffer); RegisterSurface(surface); } } @@ -1077,8 +1072,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces( // Make sure that framebuffers don't overlap if both color and depth are being used if (using_color_fb && using_depth_fb && boost::icl::length(color_vp_interval & depth_vp_interval)) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); + LOG_CRITICAL(Render_Vulkan, "Color and depth framebuffer memory regions overlap!"); using_depth_fb = false; } @@ -1086,13 +1080,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces( Surface color_surface = nullptr; if (using_color_fb) std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false, true); Common::Rectangle depth_rect{}; Surface depth_surface = nullptr; if (using_depth_fb) std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false, true); Common::Rectangle fb_rect{}; if (color_surface != nullptr && depth_surface != nullptr) { @@ -1450,13 +1444,13 @@ void RasterizerCacheVulkan::InvalidateRegion(PAddr addr, u32 size, const Surface remove_surfaces.clear(); } -Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params) { +Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params, bool framebuffer) { Surface surface = std::make_shared(*this); static_cast(*surface) = params; surface->invalid_regions.insert(surface->GetInterval()); AllocateTexture(surface->texture, params.type, GetFormatTuple(surface->pixel_format), - surface->GetScaledWidth(), surface->GetScaledHeight()); + surface->GetScaledWidth(), surface->GetScaledHeight(), framebuffer); return surface; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h index fb9cecf3c..86c01d3e5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -22,6 +23,10 @@ #include "video_core/renderer_vulkan/vk_texture.h" #include "video_core/texture/texture_decode.h" +// Can be changed later here +template +using HashMap = robin_hood::unordered_flat_map; + namespace Vulkan { class RasterizerCacheVulkan; @@ -31,32 +36,25 @@ class FormatReinterpreterVulkan; vk::Format GetFormatTuple(SurfaceParams::PixelFormat pixel_format); struct HostTextureTag { - vk::Format format; - u32 width; - u32 height; - bool operator==(const HostTextureTag& rhs) const noexcept { - return std::tie(format, width, height) == std::tie(rhs.format, rhs.width, rhs.height); - }; + vk::Format format = vk::Format::eUndefined; + u32 width = 0, height = 0; + + // Enable comparisons + auto operator<=>(const HostTextureTag& other) const = default; }; struct TextureCubeConfig { - PAddr px; - PAddr nx; - PAddr py; - PAddr ny; - PAddr pz; - PAddr nz; - u32 width; + PAddr px = 0; + PAddr nx = 0; + PAddr py = 0; + PAddr ny = 0; + PAddr pz = 0; + PAddr nz = 0; + u32 width = 0; Pica::TexturingRegs::TextureFormat format; - bool operator==(const TextureCubeConfig& rhs) const { - return std::tie(px, nx, py, ny, pz, nz, width, format) == - std::tie(rhs.px, rhs.nx, rhs.py, rhs.ny, rhs.pz, rhs.nz, rhs.width, rhs.format); - } - - bool operator!=(const TextureCubeConfig& rhs) const { - return !(*this == rhs); - } + // Enable comparisons + auto operator<=>(const TextureCubeConfig& other) const = default; }; } // namespace Vulkan @@ -98,6 +96,7 @@ using SurfaceRegions = boost::icl::interval_set; + using SurfaceCache = boost::icl::interval_map; @@ -109,8 +108,6 @@ static_assert(std::is_same>; using SurfaceSurfaceRect_Tuple = std::tuple>; -using PageMap = boost::icl::interval_map; - enum class ScaleMatch { Exact, // only accept same res scale Upscale, // only allow higher scale than params @@ -265,7 +262,7 @@ public: /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// 3DS memory to OpenGL and caches it (if not already cached) SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); + bool load_if_create, bool framebuffer = false); /// Get a surface based on the texture configuration Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); @@ -306,9 +303,9 @@ private: void ValidateSurface(const Surface& surface, PAddr addr, u32 size); // Returns false if there is a surface in the cache at the interval with the same bit-width, - bool NoUnimplementedReinterpretations(const Vulkan::Surface& surface, - Vulkan::SurfaceParams& params, - const Vulkan::SurfaceInterval& interval); + bool NoUnimplementedReinterpretations(const Surface& surface, + SurfaceParams& params, + const SurfaceInterval& interval); // Return true if a surface with an invalid pixel format exists at the interval bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); @@ -318,7 +315,7 @@ private: const SurfaceInterval& interval); /// Create a new surface - Surface CreateSurface(const SurfaceParams& params); + Surface CreateSurface(const SurfaceParams& params, bool framebuffer = false); /// Register surface into the cache void RegisterSurface(const Surface& surface); @@ -330,20 +327,20 @@ private: void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); SurfaceCache surface_cache; - PageMap cached_pages; + boost::icl::interval_map cached_pages; SurfaceMap dirty_regions; SurfaceSet remove_surfaces; u16 resolution_scale_factor; + // Texture cube cache std::unordered_map texture_cube_cache; std::recursive_mutex mutex; public: void AllocateTexture(Texture& target, SurfaceParams::SurfaceType type, vk::Format format, - u32 width, u32 height); - std::unique_ptr format_reinterpreter; + u32 width, u32 height, bool framebuffer); }; -} // namespace OpenGL +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader.cpp b/src/video_core/renderer_vulkan/vk_shader.cpp new file mode 100644 index 000000000..00d875002 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader.cpp @@ -0,0 +1,234 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include +#include +#include + +constexpr TBuiltInResource DefaultTBuiltInResource = { + .maxLights = 32, + .maxClipPlanes = 6, + .maxTextureUnits = 32, + .maxTextureCoords = 32, + .maxVertexAttribs = 64, + .maxVertexUniformComponents = 4096, + .maxVaryingFloats = 64, + .maxVertexTextureImageUnits = 32, + .maxCombinedTextureImageUnits = 80, + .maxTextureImageUnits = 32, + .maxFragmentUniformComponents = 4096, + .maxDrawBuffers = 32, + .maxVertexUniformVectors = 128, + .maxVaryingVectors = 8, + .maxFragmentUniformVectors = 16, + .maxVertexOutputVectors = 16, + .maxFragmentInputVectors = 15, + .minProgramTexelOffset = -8, + .maxProgramTexelOffset = 7, + .maxClipDistances = 8, + .maxComputeWorkGroupCountX = 65535, + .maxComputeWorkGroupCountY = 65535, + .maxComputeWorkGroupCountZ = 65535, + .maxComputeWorkGroupSizeX = 1024, + .maxComputeWorkGroupSizeY = 1024, + .maxComputeWorkGroupSizeZ = 64, + .maxComputeUniformComponents = 1024, + .maxComputeTextureImageUnits = 16, + .maxComputeImageUniforms = 8, + .maxComputeAtomicCounters = 8, + .maxComputeAtomicCounterBuffers = 1, + .maxVaryingComponents = 60, + .maxVertexOutputComponents = 64, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxFragmentInputComponents = 128, + .maxImageUnits = 8, + .maxCombinedImageUnitsAndFragmentOutputs = 8, + .maxCombinedShaderOutputResources = 8, + .maxImageSamples = 0, + .maxVertexImageUniforms = 0, + .maxTessControlImageUniforms = 0, + .maxTessEvaluationImageUniforms = 0, + .maxGeometryImageUniforms = 0, + .maxFragmentImageUniforms = 8, + .maxCombinedImageUniforms = 8, + .maxGeometryTextureImageUnits = 16, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxGeometryUniformComponents = 1024, + .maxGeometryVaryingComponents = 64, + .maxTessControlInputComponents = 128, + .maxTessControlOutputComponents = 128, + .maxTessControlTextureImageUnits = 16, + .maxTessControlUniformComponents = 1024, + .maxTessControlTotalOutputComponents = 4096, + .maxTessEvaluationInputComponents = 128, + .maxTessEvaluationOutputComponents = 128, + .maxTessEvaluationTextureImageUnits = 16, + .maxTessEvaluationUniformComponents = 1024, + .maxTessPatchComponents = 120, + .maxPatchVertices = 32, + .maxTessGenLevel = 64, + .maxViewports = 16, + .maxVertexAtomicCounters = 0, + .maxTessControlAtomicCounters = 0, + .maxTessEvaluationAtomicCounters = 0, + .maxGeometryAtomicCounters = 0, + .maxFragmentAtomicCounters = 8, + .maxCombinedAtomicCounters = 8, + .maxAtomicCounterBindings = 1, + .maxVertexAtomicCounterBuffers = 0, + .maxTessControlAtomicCounterBuffers = 0, + .maxTessEvaluationAtomicCounterBuffers = 0, + .maxGeometryAtomicCounterBuffers = 0, + .maxFragmentAtomicCounterBuffers = 1, + .maxCombinedAtomicCounterBuffers = 1, + .maxAtomicCounterBufferSize = 16384, + .maxTransformFeedbackBuffers = 4, + .maxTransformFeedbackInterleavedComponents = 64, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .maxSamples = 4, + .maxMeshOutputVerticesNV = 256, + .maxMeshOutputPrimitivesNV = 512, + .maxMeshWorkGroupSizeX_NV = 32, + .maxMeshWorkGroupSizeY_NV = 1, + .maxMeshWorkGroupSizeZ_NV = 1, + .maxTaskWorkGroupSizeX_NV = 32, + .maxTaskWorkGroupSizeY_NV = 1, + .maxTaskWorkGroupSizeZ_NV = 1, + .maxMeshViewCountNV = 4, + .maxDualSourceDrawBuffersEXT = 1, + .limits = TLimits{ + .nonInductiveForLoops = 1, + .whileLoops = 1, + .doWhileLoops = 1, + .generalUniformIndexing = 1, + .generalAttributeMatrixVectorIndexing = 1, + .generalVaryingIndexing = 1, + .generalSamplerIndexing = 1, + .generalVariableIndexing = 1, + .generalConstantMatrixVectorIndexing = 1, + }}; + + +namespace VideoCore::Vulkan { + +EShLanguage ToEshShaderStage(ShaderStage stage) { + switch (stage) { + case ShaderStage::Vertex: + return EShLanguage::EShLangVertex; + case ShaderStage::Geometry: + return EShLanguage::EShLangGeometry; + case ShaderStage::Fragment: + return EShLanguage::EShLangFragment; + case ShaderStage::Compute: + return EShLanguage::EShLangCompute; + default: + LOG_CRITICAL(Render_Vulkan, "Unkown shader stage"); + UNREACHABLE(); + } +} + +bool InitializeCompiler() { + static bool glslang_initialized = false; + + if (glslang_initialized) { + return true; + } + + if (!glslang::InitializeProcess()) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler"); + return false; + } + + std::atexit([]() { glslang::FinalizeProcess(); }); + + glslang_initialized = true; + return true; +} + +Shader::Shader(Instance& instance, ShaderStage stage, std::string_view name, + std::string&& source) : + ShaderBase(stage, name, std::move(source)), instance(instance) { +} + +Shader::~Shader() { + vk::Device device = instance.GetDevice(); + device.destroyShaderModule(module); +} + +bool Shader::Compile(ShaderOptimization level) { + if (!InitializeCompiler()) { + return false; + } + + EProfile profile = ECoreProfile; + EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules); + EShLanguage lang = ToEshShaderStage(stage); + + int default_version = 450; + const char* pass_source_code = source.c_str(); + int pass_source_code_length = source.size(); + + auto shader = std::make_unique(lang); + shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3); + shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1); + + glslang::TShader::ForbidIncluder includer; + if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) { + LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); + return false; + } + + // Even though there's only a single shader, we still need to link it to generate SPV + auto program = std::make_unique(); + program->addShader(shader.get()); + if (!program->link(messages)) { + LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog()); + return false; + } + + glslang::TIntermediate* intermediate = program->getIntermediate(lang); + std::vector out_code; + spv::SpvBuildLogger logger; + glslang::SpvOptions options; + + // Compile the SPIR-V module without optimizations for easier debugging in RenderDoc. + if (level == ShaderOptimization::Debug) { + intermediate->addSourceText(pass_source_code, pass_source_code_length); + options.generateDebugInfo = true; + options.disableOptimizer = true; + options.optimizeSize = false; + options.disassemble = false; + options.validate = true; + } else { + options.disableOptimizer = false; + options.stripDebugInfo = true; + } + + glslang::GlslangToSpv(*intermediate, out_code, &logger, &options); + + const std::string spv_messages = logger.getAllMessages(); + if (!spv_messages.empty()) { + LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages); + } + + const vk::ShaderModuleCreateInfo shader_info = { + .codeSize = out_code.size() * sizeof(u32), + .pCode = out_code.data() + }; + + vk::Device device = instance.GetDevice(); + module = device.createShaderModule(shader_info); + + return true; +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader.h b/src/video_core/renderer_vulkan/vk_shader.h new file mode 100644 index 000000000..d503ee55b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader.h @@ -0,0 +1,32 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/common/shader.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore::Vulkan { + +class Instance; + +class Shader : public VideoCore::ShaderBase { +public: + Shader(Instance& instance, ShaderStage stage, std::string_view name, + std::string&& source); + ~Shader() override; + + bool Compile(ShaderOptimization level) override; + + /// Returns the underlying vulkan shader module handle + vk::ShaderModule GetHandle() const { + return module; + } + +private: + Instance& instance; + vk::ShaderModule module; +}; + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 832632fec..9f915dee8 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -2,28 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include #include #include "common/assert.h" #include "common/bit_field.h" -#include "common/bit_set.h" #include "common/logging/log.h" #include "core/core.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_lighting.h" -#include "video_core/regs_rasterizer.h" -#include "video_core/regs_texturing.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_shader_gen.h" -#include "video_core/renderer_opengl/gl_shader_util.h" -#include "video_core/video_core.h" - -#include -#include -#include using Pica::FramebufferRegs; using Pica::LightingRegs; @@ -32,56 +16,7 @@ using Pica::TexturingRegs; using TevStageConfig = TexturingRegs::TevStageConfig; using VSOutputAttributes = RasterizerRegs::VSOutputAttributes; -namespace Vulkan { - -static const char present_vertex_shader_source[] = R"( -#version 450 core -#extension GL_ARB_separate_shader_objects : enable -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec3 vert_tex_coord; -layout (location = 0) out vec3 frag_tex_coord; - -layout (push_constant) uniform DrawInfo { - mat4 modelview_matrix; - vec4 i_resolution; - vec4 o_resolution; - int layer; -}; - -void main() { - vec4 position = vec4(vert_position, 0.0, 1.0) * modelview_matrix; - gl_Position = vec4(position.x, -position.y, 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - -static const char present_fragment_shader_source[] = R"( -#version 450 core -#extension GL_ARB_separate_shader_objects : enable -layout (location = 0) in vec3 frag_tex_coord; -layout (location = 0) out vec4 color; - -layout (push_constant) uniform DrawInfo { - mat3x2 modelview_matrix; - vec4 i_resolution; - vec4 o_resolution; - int layer; -}; - -layout (set = 0, binding = 0) uniform sampler2D screen_textures[3]; - -void main() { - color = texture(screen_textures[int(frag_tex_coord.z)], frag_tex_coord.xy); -} -)"; - -std::string GetPresentVertexShader() { - return present_vertex_shader_source; -} - -std::string GetPresentFragmentShader() { - return present_fragment_shader_source; -} +namespace VideoCore::Vulkan { constexpr std::string_view UniformBlockDef = R"( #define NUM_TEV_STAGES 6 @@ -162,184 +97,6 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_ return out; } -PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) { - PicaFSConfig res{}; - - auto& state = res.state; - - state.scissor_test_mode = regs.rasterizer.scissor_test.mode; - - state.depthmap_enable = regs.rasterizer.depthmap_enable; - - state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable - ? regs.framebuffer.output_merger.alpha_test.func.Value() - : FramebufferRegs::CompareFunc::Always; - - state.texture0_type = regs.texturing.texture0.type; - - state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; - - // We don't need these otherwise, reset them to avoid unnecessary shader generation - state.alphablend_enable = {}; - state.logic_op = {}; - - // Copy relevant tev stages fields. - // We don't sync const_color here because of the high variance, it is a - // shader uniform instead. - const auto& tev_stages = regs.texturing.GetTevStages(); - DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); - for (std::size_t i = 0; i < tev_stages.size(); i++) { - const auto& tev_stage = tev_stages[i]; - state.tev_stages[i].sources_raw = tev_stage.sources_raw; - state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; - state.tev_stages[i].ops_raw = tev_stage.ops_raw; - state.tev_stages[i].scales_raw = tev_stage.scales_raw; - } - - state.fog_mode = regs.texturing.fog_mode; - state.fog_flip = regs.texturing.fog_flip != 0; - - state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | - regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() - << 4; - - // Fragment lighting - - state.lighting.enable = !regs.lighting.disable; - state.lighting.src_num = regs.lighting.max_light_index + 1; - - for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { - unsigned num = regs.lighting.light_enable.GetNum(light_index); - const auto& light = regs.lighting.light[num]; - state.lighting.light[light_index].num = num; - state.lighting.light[light_index].directional = light.config.directional != 0; - state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; - state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0; - state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0; - state.lighting.light[light_index].dist_atten_enable = - !regs.lighting.IsDistAttenDisabled(num); - state.lighting.light[light_index].spot_atten_enable = - !regs.lighting.IsSpotAttenDisabled(num); - state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num); - } - - state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; - state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; - state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); - state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - - state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0; - state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; - state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); - state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - - // this is a dummy field due to lack of the corresponding register - state.lighting.lut_sp.enable = true; - state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0; - state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value(); - state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); - - state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; - state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; - state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); - state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - - state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0; - state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; - state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); - state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - - state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0; - state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; - state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); - state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - - state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0; - state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; - state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); - state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - - state.lighting.config = regs.lighting.config0.config; - state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha; - state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha; - state.lighting.bump_mode = regs.lighting.config0.bump_mode; - state.lighting.bump_selector = regs.lighting.config0.bump_selector; - state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; - state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; - - state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0; - state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0; - state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0; - state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0; - state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0; - state.lighting.shadow_selector = regs.lighting.config0.shadow_selector; - - state.proctex.enable = regs.texturing.main_config.texture3_enable; - if (state.proctex.enable) { - state.proctex.coord = regs.texturing.main_config.texture3_coordinates; - state.proctex.u_clamp = regs.texturing.proctex.u_clamp; - state.proctex.v_clamp = regs.texturing.proctex.v_clamp; - state.proctex.color_combiner = regs.texturing.proctex.color_combiner; - state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; - state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha; - state.proctex.noise_enable = regs.texturing.proctex.noise_enable; - state.proctex.u_shift = regs.texturing.proctex.u_shift; - state.proctex.v_shift = regs.texturing.proctex.v_shift; - state.proctex.lut_width = regs.texturing.proctex_lut.width; - state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; - state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; - state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; - state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; - state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; - state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; - state.proctex.lut_filter = regs.texturing.proctex_lut.filter; - } - - state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == - FramebufferRegs::FragmentOperationMode::Shadow; - - state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; - - return res; -} - -void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { - program_hash = setup.GetProgramCodeHash(); - swizzle_hash = setup.GetSwizzleDataHash(); - main_offset = regs.main_offset; - sanitize_mul = VideoCore::g_hw_shader_accurate_mul; - - num_outputs = 0; - output_map.fill(16); - - for (int reg : Common::BitSet(regs.output_mask)) { - output_map[reg] = num_outputs++; - } -} - -void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { - vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); - gs_output_attributes = vs_output_attributes; - - semantic_maps.fill({16, 0}); - for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { - const std::array semantics{ - regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), - }; - for (u32 comp = 0; comp < 4; ++comp) { - const auto semantic = semantics[comp]; - if (static_cast(semantic) < 24) { - semantic_maps[static_cast(semantic)] = {attrib, comp}; - } else if (semantic != VSOutputAttributes::INVALID) { - LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); - } - } - } -} - /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) static bool IsPassThroughTevStage(const TevStageConfig& stage) { return (stage.color_op == TevStageConfig::Operation::Replace && @@ -352,7 +109,7 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) { } static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) { - const auto& state = config.state; + const auto& state = config; switch (texture_unit) { case 0: // Only unit 0 respects the texturing type @@ -628,23 +385,22 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper } /// Writes the if-statement condition used to evaluate alpha testing -static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) { - using CompareFunc = FramebufferRegs::CompareFunc; +static void AppendAlphaTestCondition(std::string& out, Pica::CompareFunc func) { switch (func) { - case CompareFunc::Never: + case Pica::CompareFunc::Never: out += "true"; break; - case CompareFunc::Always: + case Pica::CompareFunc::Always: out += "false"; break; - case CompareFunc::Equal: - case CompareFunc::NotEqual: - case CompareFunc::LessThan: - case CompareFunc::LessThanOrEqual: - case CompareFunc::GreaterThan: - case CompareFunc::GreaterThanOrEqual: { + case Pica::CompareFunc::Equal: + case Pica::CompareFunc::NotEqual: + case Pica::CompareFunc::LessThan: + case Pica::CompareFunc::LessThanOrEqual: + case Pica::CompareFunc::GreaterThan: + case Pica::CompareFunc::GreaterThanOrEqual: { static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"}; - const auto index = static_cast(func) - static_cast(CompareFunc::Equal); + const auto index = static_cast(func) - static_cast(Pica::CompareFunc::Equal); out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]); break; } @@ -659,7 +415,7 @@ static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareF /// Writes the code to emulate the specified TEV stage static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) { const auto stage = - static_cast(config.state.tev_stages[index]); + static_cast(config.tev_stages[index]); if (!IsPassThroughTevStage(stage)) { const std::string index_name = std::to_string(index); @@ -716,7 +472,7 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned /// Writes the code to emulate fragment lighting static void WriteLighting(std::string& out, const PicaFSConfig& config) { - const auto& lighting = config.state.lighting; + const auto& lighting = config.lighting; // Define lighting globals out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" @@ -1119,7 +875,7 @@ float ProcTexLookupLUT(int offset, float coord) { )"; // Noise utility - if (config.state.proctex.noise_enable) { + if (config.proctex.noise_enable) { // See swrasterizer/proctex.cpp for more information about these functions out += R"( int ProcTexNoiseRand1D(int v) { @@ -1159,16 +915,16 @@ float ProcTexNoiseCoef(vec2 x) { } out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n"; - out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width); + out += fmt::format("int lut_width = {} >> level;\n", config.proctex.lut_width); // Offsets for level 4-7 seem to be hardcoded out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n", - config.state.proctex.lut_offset0, config.state.proctex.lut_offset1, - config.state.proctex.lut_offset2, config.state.proctex.lut_offset3); + config.proctex.lut_offset0, config.proctex.lut_offset1, + config.proctex.lut_offset2, config.proctex.lut_offset3); out += "int lut_offset = lut_offsets[level];\n"; // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] out += "lut_coord *= float(lut_width - 1);\n"; - switch (config.state.proctex.lut_filter) { + switch (config.proctex.lut_filter) { case ProcTexFilter::Linear: case ProcTexFilter::LinearMipmapLinear: case ProcTexFilter::LinearMipmapNearest: @@ -1191,8 +947,8 @@ float ProcTexNoiseCoef(vec2 x) { out += "}\n"; out += "vec4 ProcTex() {\n"; - if (config.state.proctex.coord < 3) { - out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord); + if (config.proctex.coord < 3) { + out += fmt::format("vec2 uv = abs(texcoord{});\n", config.proctex.coord); } else { LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3"); out += "vec2 uv = abs(texcoord0);\n"; @@ -1205,23 +961,23 @@ float ProcTexNoiseCoef(vec2 x) { out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n"; // unlike normal texture, the bias is inside the log2 out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n", - config.state.proctex.lut_width); + config.proctex.lut_width); out += "if (proctex_bias == 0.0) lod = 0.0;\n"; out += fmt::format("lod = clamp(lod, {:#}, {:#});\n", - std::max(0.0f, static_cast(config.state.proctex.lod_min)), - std::min(7.0f, static_cast(config.state.proctex.lod_max))); + std::max(0.0f, static_cast(config.proctex.lod_min)), + std::min(7.0f, static_cast(config.proctex.lod_max))); // Get shift offset before noise generation out += "float u_shift = "; - AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, - config.state.proctex.u_clamp); + AppendProcTexShiftOffset(out, "uv.y", config.proctex.u_shift, + config.proctex.u_clamp); out += ";\n"; out += "float v_shift = "; - AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, - config.state.proctex.v_clamp); + AppendProcTexShiftOffset(out, "uv.x", config.proctex.v_shift, + config.proctex.v_clamp); out += ";\n"; // Generate noise - if (config.state.proctex.noise_enable) { + if (config.proctex.noise_enable) { out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n" "uv = abs(uv);\n"; } @@ -1231,16 +987,16 @@ float ProcTexNoiseCoef(vec2 x) { "float v = uv.y + v_shift;\n"; // Clamp - AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); - AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); + AppendProcTexClamp(out, "u", config.proctex.u_clamp); + AppendProcTexClamp(out, "v", config.proctex.v_clamp); // Combine and map out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, + AppendProcTexCombineAndMap(out, config.proctex.color_combiner, "proctex_color_map_offset"); out += ";\n"; - switch (config.state.proctex.lut_filter) { + switch (config.proctex.lut_filter) { case ProcTexFilter::Linear: case ProcTexFilter::Nearest: out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n"; @@ -1258,11 +1014,11 @@ float ProcTexNoiseCoef(vec2 x) { break; } - if (config.state.proctex.separate_alpha) { + if (config.proctex.separate_alpha) { // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It // uses the output of CombineAndMap directly instead. out += "float final_alpha = "; - AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, + AppendProcTexCombineAndMap(out, config.proctex.alpha_combiner, "proctex_alpha_map_offset"); out += ";\n"; out += "return vec4(final_color.xyz, final_alpha);\n}\n"; @@ -1271,8 +1027,8 @@ float ProcTexNoiseCoef(vec2 x) { } } -std::string GenerateFragmentShader(const PicaFSConfig& config) { - const auto& state = config.state; +std::string ShaderGenerator::GenerateFragmentShader(const PicaFSConfig& config, bool seperable_shader) { + const auto& state = config; std::string out; out += R"( @@ -1387,7 +1143,7 @@ std::string GenerateFragmentShader(const PicaFSConfig& config) { vec4 shadowTexture(vec2 uv, float w) { )"; - if (!config.state.shadow_texture_orthographic) { + if (!config.shadow_texture_orthographic) { out += "uv /= w;"; } @@ -1501,7 +1257,7 @@ vec4 shadowTextureCube(vec2 uv, float w) { #endif )"; - if (config.state.proctex.enable) + if (config.proctex.enable) AppendProcTexSampler(out, config); // We round the interpolated primary color to the nearest 1/255th @@ -1514,7 +1270,7 @@ vec4 secondary_fragment_color = vec4(0.0); )"; // Do not do any sort of processing if it's obvious we're not going to pass the alpha test - if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { + if (state.alpha_test_func == Pica::CompareFunc::Never) { out += "discard; }"; return out; } @@ -1552,7 +1308,7 @@ vec4 secondary_fragment_color = vec4(0.0); WriteTevStage(out, config, static_cast(index)); } - if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) { + if (state.alpha_test_func != Pica::CompareFunc::Always) { out += "if ("; AppendAlphaTestCondition(out, state.alpha_test_func); out += ") discard;\n"; @@ -1623,21 +1379,20 @@ do { return out; } -std::string GenerateTrivialVertexShader(bool separable_shader) { +std::string ShaderGenerator::GenerateTrivialVertexShader(bool separable_shader) { std::string out; out += "#version 450\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n"; - out += - fmt::format("layout(location = {}) in vec4 vert_position;\n" - "layout(location = {}) in vec4 vert_color;\n" - "layout(location = {}) in vec2 vert_texcoord0;\n" - "layout(location = {}) in vec2 vert_texcoord1;\n" - "layout(location = {}) in vec2 vert_texcoord2;\n" - "layout(location = {}) in float vert_texcoord0_w;\n" - "layout(location = {}) in vec4 vert_normquat;\n" - "layout(location = {}) in vec3 vert_view;\n", - ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); + out += fmt::format("layout(location = {}) in vec4 vert_position;\n" + "layout(location = {}) in vec4 vert_color;\n" + "layout(location = {}) in vec2 vert_texcoord0;\n" + "layout(location = {}) in vec2 vert_texcoord1;\n" + "layout(location = {}) in vec2 vert_texcoord2;\n" + "layout(location = {}) in float vert_texcoord0_w;\n" + "layout(location = {}) in vec4 vert_normquat;\n" + "layout(location = {}) in vec3 vert_view;\n", + ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); out += GetVertexInterfaceDeclaration(true, separable_shader); @@ -1656,7 +1411,6 @@ void main() { gl_Position = vert_position; gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0; - //gl_Position.y = -gl_Position.y; //gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 //gl_ClipDistance[1] = dot(clip_coef, vert_position); } @@ -1665,205 +1419,15 @@ void main() { return out; } -bool InitializeCompiler() { - static bool glslang_initialized = false; - - if (glslang_initialized) { - return true; - } - - if (!glslang::InitializeProcess()) { - LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler"); - return false; - } - - std::atexit([]() { glslang::FinalizeProcess(); }); - - glslang_initialized = true; - return true; +std::string ShaderGenerator::GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, + bool separable_shader) { + LOG_CRITICAL(Render_Vulkan, "Unimplemented!"); + UNREACHABLE(); } -const TBuiltInResource DefaultTBuiltInResource = { - .maxLights = 32, - .maxClipPlanes = 6, - .maxTextureUnits = 32, - .maxTextureCoords = 32, - .maxVertexAttribs = 64, - .maxVertexUniformComponents = 4096, - .maxVaryingFloats = 64, - .maxVertexTextureImageUnits = 32, - .maxCombinedTextureImageUnits = 80, - .maxTextureImageUnits = 32, - .maxFragmentUniformComponents = 4096, - .maxDrawBuffers = 32, - .maxVertexUniformVectors = 128, - .maxVaryingVectors = 8, - .maxFragmentUniformVectors = 16, - .maxVertexOutputVectors = 16, - .maxFragmentInputVectors = 15, - .minProgramTexelOffset = -8, - .maxProgramTexelOffset = 7, - .maxClipDistances = 8, - .maxComputeWorkGroupCountX = 65535, - .maxComputeWorkGroupCountY = 65535, - .maxComputeWorkGroupCountZ = 65535, - .maxComputeWorkGroupSizeX = 1024, - .maxComputeWorkGroupSizeY = 1024, - .maxComputeWorkGroupSizeZ = 64, - .maxComputeUniformComponents = 1024, - .maxComputeTextureImageUnits = 16, - .maxComputeImageUniforms = 8, - .maxComputeAtomicCounters = 8, - .maxComputeAtomicCounterBuffers = 1, - .maxVaryingComponents = 60, - .maxVertexOutputComponents = 64, - .maxGeometryInputComponents = 64, - .maxGeometryOutputComponents = 128, - .maxFragmentInputComponents = 128, - .maxImageUnits = 8, - .maxCombinedImageUnitsAndFragmentOutputs = 8, - .maxCombinedShaderOutputResources = 8, - .maxImageSamples = 0, - .maxVertexImageUniforms = 0, - .maxTessControlImageUniforms = 0, - .maxTessEvaluationImageUniforms = 0, - .maxGeometryImageUniforms = 0, - .maxFragmentImageUniforms = 8, - .maxCombinedImageUniforms = 8, - .maxGeometryTextureImageUnits = 16, - .maxGeometryOutputVertices = 256, - .maxGeometryTotalOutputComponents = 1024, - .maxGeometryUniformComponents = 1024, - .maxGeometryVaryingComponents = 64, - .maxTessControlInputComponents = 128, - .maxTessControlOutputComponents = 128, - .maxTessControlTextureImageUnits = 16, - .maxTessControlUniformComponents = 1024, - .maxTessControlTotalOutputComponents = 4096, - .maxTessEvaluationInputComponents = 128, - .maxTessEvaluationOutputComponents = 128, - .maxTessEvaluationTextureImageUnits = 16, - .maxTessEvaluationUniformComponents = 1024, - .maxTessPatchComponents = 120, - .maxPatchVertices = 32, - .maxTessGenLevel = 64, - .maxViewports = 16, - .maxVertexAtomicCounters = 0, - .maxTessControlAtomicCounters = 0, - .maxTessEvaluationAtomicCounters = 0, - .maxGeometryAtomicCounters = 0, - .maxFragmentAtomicCounters = 8, - .maxCombinedAtomicCounters = 8, - .maxAtomicCounterBindings = 1, - .maxVertexAtomicCounterBuffers = 0, - .maxTessControlAtomicCounterBuffers = 0, - .maxTessEvaluationAtomicCounterBuffers = 0, - .maxGeometryAtomicCounterBuffers = 0, - .maxFragmentAtomicCounterBuffers = 1, - .maxCombinedAtomicCounterBuffers = 1, - .maxAtomicCounterBufferSize = 16384, - .maxTransformFeedbackBuffers = 4, - .maxTransformFeedbackInterleavedComponents = 64, - .maxCullDistances = 8, - .maxCombinedClipAndCullDistances = 8, - .maxSamples = 4, - .maxMeshOutputVerticesNV = 256, - .maxMeshOutputPrimitivesNV = 512, - .maxMeshWorkGroupSizeX_NV = 32, - .maxMeshWorkGroupSizeY_NV = 1, - .maxMeshWorkGroupSizeZ_NV = 1, - .maxTaskWorkGroupSizeX_NV = 32, - .maxTaskWorkGroupSizeY_NV = 1, - .maxTaskWorkGroupSizeZ_NV = 1, - .maxMeshViewCountNV = 4, - .maxDualSourceDrawBuffersEXT = 1, - .limits = TLimits{ - .nonInductiveForLoops = 1, - .whileLoops = 1, - .doWhileLoops = 1, - .generalUniformIndexing = 1, - .generalAttributeMatrixVectorIndexing = 1, - .generalVaryingIndexing = 1, - .generalSamplerIndexing = 1, - .generalVariableIndexing = 1, - .generalConstantMatrixVectorIndexing = 1, - }}; - -vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits vk_stage) { - if (!InitializeCompiler()) { - return VK_NULL_HANDLE; - } - - EShLanguage stage; - switch (vk_stage) { - case vk::ShaderStageFlagBits::eVertex: - stage = EShLangVertex; - break; - case vk::ShaderStageFlagBits::eFragment: - stage = EShLangFragment; - break; - default: - LOG_CRITICAL(Render_Vulkan, "Unknown shader stage"); - UNREACHABLE(); - } - - std::unique_ptr shader = std::make_unique(stage); - std::unique_ptr program; - glslang::TShader::ForbidIncluder includer; - EProfile profile = ECoreProfile; - EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules); - - int default_version = 450; - const char* pass_source_code = source.data(); - int pass_source_code_length = static_cast(source.size()); - - shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3); - shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1); - - if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) { - LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); - return VK_NULL_HANDLE; - } - - // Even though there's only a single shader, we still need to link it to generate SPV - program = std::make_unique(); - program->addShader(shader.get()); - if (!program->link(messages)) { - LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog()); - return VK_NULL_HANDLE; - } - - glslang::TIntermediate* intermediate = program->getIntermediate(stage); - std::vector out_code; - spv::SpvBuildLogger logger; - glslang::SpvOptions options; - - // Compile the SPIR-V module without optimizations for easier debugging in RenderDoc. - if (true) { - intermediate->addSourceText(pass_source_code, pass_source_code_length); - options.generateDebugInfo = true; - options.disableOptimizer = true; - options.optimizeSize = false; - options.disassemble = false; - options.validate = true; - } - else { - options.disableOptimizer = false; - options.stripDebugInfo = true; - } - - glslang::GlslangToSpv(*intermediate, out_code, &logger, &options); - - const std::string spv_messages = logger.getAllMessages(); - if (!spv_messages.empty()) { - LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages); - } - - vk::ShaderModuleCreateInfo shader_info{{}, out_code.size() * sizeof(u32), out_code.data()}; - const vk::Device device = g_vk_instace->GetDevice(); - vk::ShaderModule shader_module = device.createShaderModule(shader_info); - return shader_module; - +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) { + LOG_CRITICAL(Render_Vulkan, "Unimplemented!"); + UNREACHABLE(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 89aad4475..7e9775411 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -4,46 +4,23 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include "common/hash.h" -#include "video_core/regs.h" -#include "video_core/shader/shader.h" -#include "video_core/renderer_vulkan/vk_shader_state.h" +#include "video_core/common/shader_gen.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -/** - * Returns the vertex and fragment shader sources used for presentation - * @returns String of shader source code - */ -std::string GetPresentVertexShader(); -std::string GetPresentFragmentShader(); +class ShaderGenerator : public VideoCore::ShaderGeneratorBase { +public: + ShaderGenerator() = default; + ~ShaderGenerator() override = default; -/** - * Generates the GLSL vertex shader program source code that accepts vertices from software shader - * and directly passes them to the fragment shader. - * @param separable_shader generates shader that can be used for separate shader object - * @returns String of the shader source code - */ -std::string GenerateTrivialVertexShader(bool separable_shader); + std::string GenerateTrivialVertexShader(bool separable_shader) override; -/** - * Generates the GLSL fragment shader program source code for the current Pica state - * @param config ShaderCacheKey object generated for the current Pica state, used for the shader - * configuration (NOTE: Use state in this struct only, not the Pica registers!) - * @param separable_shader generates shader that can be used for separate shader object - * @returns String of the shader source code - */ -std::string GenerateFragmentShader(const PicaFSConfig& config); + std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, + bool separable_shader) override; -/** - * Generates a SPRI-V shader module from the provided GLSL source code - */ -vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits stage); + std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) override; -} // namespace Vulkan + std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) override; +}; + +} // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/vk_shader_state.h b/src/video_core/renderer_vulkan/vk_shader_state.h index f90e12b16..45cbdb8bc 100644 --- a/src/video_core/renderer_vulkan/vk_shader_state.h +++ b/src/video_core/renderer_vulkan/vk_shader_state.h @@ -14,10 +14,10 @@ namespace Vulkan { -/* Render vertex attributes */ -struct VertexBase { - VertexBase() = default; - VertexBase(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { +/// Structure that the hardware rendered vertices are composed of +struct HardwareVertex { + HardwareVertex() = default; + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { position[0] = v.pos.x.ToFloat32(); position[1] = v.pos.y.ToFloat32(); position[2] = v.pos.z.ToFloat32(); @@ -56,31 +56,12 @@ struct VertexBase { glm::vec3 view; }; -/// Structure that the hardware rendered vertices are composed of -struct HardwareVertex : public VertexBase { - HardwareVertex() = default; - HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexBase(v, flip_quaternion) {}; - static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexBase)); - static constexpr std::array attribute_desc = - { - vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, color)), - vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord0)), - vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord1)), - vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord2)), - vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexBase, tex_coord0_w)), - vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, normquat)), - vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexBase, view)), - }; -}; - /** * Vertex structure that the drawn screen rectangles are composed of. */ - -struct ScreenRectVertexBase { - ScreenRectVertexBase() = default; - ScreenRectVertexBase(float x, float y, float u, float v, float s) { +struct ScreenRectVertex { + ScreenRectVertex() = default; + ScreenRectVertex(float x, float y, float u, float v, float s) { position.x = x; position.y = y; tex_coord.x = u; @@ -92,241 +73,4 @@ struct ScreenRectVertexBase { glm::vec3 tex_coord; }; -struct ScreenRectVertex : public ScreenRectVertexBase { - ScreenRectVertex() = default; - ScreenRectVertex(float x, float y, float u, float v, float s) : ScreenRectVertexBase(x, y, u, v, s) {}; - static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertexBase)); - static constexpr std::array attribute_desc = - { - vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, offsetof(ScreenRectVertexBase, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(ScreenRectVertexBase, tex_coord)), - }; -}; - -enum class ProgramType : u32 { VS, GS, FS }; - -enum Attributes { - ATTRIBUTE_POSITION, - ATTRIBUTE_COLOR, - ATTRIBUTE_TEXCOORD0, - ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, - ATTRIBUTE_TEXCOORD0_W, - ATTRIBUTE_NORMQUAT, - ATTRIBUTE_VIEW, -}; - -// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() -struct TevStageConfigRaw { - u32 sources_raw; - u32 modifiers_raw; - u32 ops_raw; - u32 scales_raw; - explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { - Pica::TexturingRegs::TevStageConfig stage; - stage.sources_raw = sources_raw; - stage.modifiers_raw = modifiers_raw; - stage.ops_raw = ops_raw; - stage.const_color = 0; - stage.scales_raw = scales_raw; - return stage; - } -}; - -struct PicaFSConfigState { - Pica::FramebufferRegs::CompareFunc alpha_test_func; - Pica::RasterizerRegs::ScissorMode scissor_test_mode; - Pica::TexturingRegs::TextureConfig::TextureType texture0_type; - bool texture2_use_coord1; - std::array tev_stages; - u8 combiner_buffer_input; - - Pica::RasterizerRegs::DepthBuffering depthmap_enable; - Pica::TexturingRegs::FogMode fog_mode; - bool fog_flip; - bool alphablend_enable; - Pica::FramebufferRegs::LogicOp logic_op; - - struct { - struct { - unsigned num; - bool directional; - bool two_sided_diffuse; - bool dist_atten_enable; - bool spot_atten_enable; - bool geometric_factor_0; - bool geometric_factor_1; - bool shadow_enable; - } light[8]; - - bool enable; - unsigned src_num; - Pica::LightingRegs::LightingBumpMode bump_mode; - unsigned bump_selector; - bool bump_renorm; - bool clamp_highlights; - - Pica::LightingRegs::LightingConfig config; - bool enable_primary_alpha; - bool enable_secondary_alpha; - - bool enable_shadow; - bool shadow_primary; - bool shadow_secondary; - bool shadow_invert; - bool shadow_alpha; - unsigned shadow_selector; - - struct { - bool enable; - bool abs_input; - Pica::LightingRegs::LightingLutInput type; - float scale; - } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; - } lighting; - - struct { - bool enable; - u32 coord; - Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; - Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; - bool separate_alpha; - bool noise_enable; - Pica::TexturingRegs::ProcTexShift u_shift, v_shift; - u32 lut_width; - u32 lut_offset0; - u32 lut_offset1; - u32 lut_offset2; - u32 lut_offset3; - u32 lod_min; - u32 lod_max; - Pica::TexturingRegs::ProcTexFilter lut_filter; - } proctex; - - bool shadow_rendering; - bool shadow_texture_orthographic; -}; - -/** - * This struct contains all state used to generate the GLSL fragment shader that emulates the - * current Pica register configuration. This struct is used as a cache key for generated GLSL shader - * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by - * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where - * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) - * two separate shaders sharing the same key. - */ -struct PicaFSConfig : Common::HashableStruct { - - /// Construct a PicaFSConfig with the given Pica register configuration. - static PicaFSConfig BuildFromRegs(const Pica::Regs& regs); - - bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { - return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); - } - - bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { - return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); - } -}; - -/** - * This struct contains common information to identify a GL vertex/geometry shader generated from - * PICA vertex/geometry shader. - */ -struct PicaShaderConfigCommon { - void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); - - u64 program_hash; - u64 swizzle_hash; - u32 main_offset; - bool sanitize_mul; - - u32 num_outputs; - - // output_map[output register index] -> output attribute index - std::array output_map; -}; - -/** - * This struct contains information to identify a GL vertex shader generated from PICA vertex - * shader. - */ -struct PicaVSConfig : Common::HashableStruct { - explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { - state.Init(regs, setup); - } - explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { - state = conf; - } -}; - -struct PicaGSConfigCommonRaw { - void Init(const Pica::Regs& regs); - - u32 vs_output_attributes; - u32 gs_output_attributes; - - struct SemanticMap { - u32 attribute_index; - u32 component_index; - }; - - // semantic_maps[semantic name] -> GS output attribute index + component index - std::array semantic_maps; -}; - -/** - * This struct contains information to identify a GL geometry shader generated from PICA no-geometry - * shader pipeline - */ -struct PicaFixedGSConfig : Common::HashableStruct { - explicit PicaFixedGSConfig(const Pica::Regs& regs) { - state.Init(regs); - } -}; - -struct PipelineCacheKey { - vk::Format color, depth_stencil; - vk::PipelineColorBlendAttachmentState blend_config; - vk::LogicOp blend_logic_op; - PicaFSConfig fragment_config; - - auto operator <=>(const PipelineCacheKey& other) const = default; - - u64 Hash() const { - const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof(PipelineCacheKey)); - return static_cast(hash); - } -}; - } // namespace Vulkan - -namespace std { -template <> -struct hash { - std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept { - return k.Hash(); - } -}; - -template <> -struct hash { - std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept { - return k.Hash(); - } -}; - -template <> -struct hash { - std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept { - return k.Hash(); - } -}; - -template <> -struct hash { - size_t operator()(const Vulkan::PipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_vulkan/vk_state.h b/src/video_core/renderer_vulkan/vk_state.h index d65f749e5..7f82cc614 100644 --- a/src/video_core/renderer_vulkan/vk_state.h +++ b/src/video_core/renderer_vulkan/vk_state.h @@ -7,9 +7,9 @@ #include #include #include "video_core/regs.h" +#include "video_core/renderer_vulkan/vk_buffer.h" #include "video_core/renderer_vulkan/vk_shader_state.h" #include "video_core/renderer_vulkan/vk_pipeline_builder.h" -#include "video_core/renderer_vulkan/vk_texture.h" namespace Vulkan { @@ -69,7 +69,7 @@ public: bool StencilTestEnabled() const { return stencil_enabled && stencil_writes; } /// Configure drawing state - void SetVertexBuffer(const Buffer& buffer, vk::DeviceSize offset); + void SetVertexBuffer(const StreamBuffer& buffer, vk::DeviceSize offset); void SetViewport(vk::Viewport viewport); void SetScissor(vk::Rect2D scissor); void SetCullMode(vk::CullModeFlags flags); @@ -100,9 +100,9 @@ public: void EndRendering(); /// Configure shader resources - void SetUniformBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer); + void SetUniformBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer); void SetTexture(u32 binding, const Texture& texture); - void SetTexelBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer, u32 view_index); + void SetTexelBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer, u32 view_index); void SetPresentTextures(vk::ImageView view0, vk::ImageView view1, vk::ImageView view2); void SetPresentData(DrawInfo data); void SetPlaceholderColor(u8 red, u8 green, u8 blue, u8 alpha); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a4f920bef..4c31c61cb 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -2,60 +2,69 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#define VULKAN_HPP_NO_CONSTRUCTORS #include #include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_instance.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -Swapchain::Swapchain(vk::SurfaceKHR surface_) : surface(surface_) { +Swapchain::Swapchain(Instance& instance, vk::SurfaceKHR surface) : + instance(instance), surface(surface) { } Swapchain::~Swapchain() { - auto device = g_vk_instace->GetDevice(); - auto instance = g_vk_instace->GetInstance(); - device.waitIdle(); - + // Destroy swapchain resources + vk::Device device = instance.GetDevice(); device.destroySemaphore(render_finished); device.destroySemaphore(image_available); device.destroySwapchainKHR(swapchain); - instance.destroySurfaceKHR(surface); } -bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) { +void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) { is_outdated = false; is_suboptimal = false; // Fetch information about the provided surface - PopulateSwapchainDetails(surface, width, height); + Configure(width, height); - const std::array indices { - g_vk_instace->GetGraphicsQueueFamilyIndex(), - g_vk_instace->GetPresentQueueFamilyIndex(), + const std::array queue_family_indices = { + instance.GetGraphicsQueueFamilyIndex(), + instance.GetPresentQueueFamilyIndex(), }; + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; + const u32 queue_family_indices_count = exclusive ? 2u : 1u; + const vk::SharingMode sharing_mode = exclusive ? vk::SharingMode::eExclusive : + vk::SharingMode::eConcurrent; + // Now we can actually create the swapchain - vk::SwapchainCreateInfoKHR swapchain_info{{}, surface, details.image_count, details.format.format, - details.format.colorSpace, details.extent, 1, vk::ImageUsageFlagBits::eColorAttachment, - vk::SharingMode::eExclusive, 1, indices.data(), details.transform, - vk::CompositeAlphaFlagBitsKHR::eOpaque, details.present_mode, true, swapchain}; + const vk::SwapchainCreateInfoKHR swapchain_info = { + .surface = surface, + .minImageCount = image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = extent, + .imageArrayLayers = 1, + .imageUsage = vk::ImageUsageFlagBits::eColorAttachment, + .imageSharingMode = sharing_mode, + .queueFamilyIndexCount = queue_family_indices_count, + .pQueueFamilyIndices = queue_family_indices.data(), + .preTransform = transform, + .presentMode = present_mode, + .clipped = true, + .oldSwapchain = swapchain + }; - // For dedicated present queues, select concurrent sharing mode - if (indices[0] != indices[1]) { - swapchain_info.imageSharingMode = vk::SharingMode::eConcurrent; - swapchain_info.queueFamilyIndexCount = 2; - } - - auto device = g_vk_instace->GetDevice(); - auto new_swapchain = device.createSwapchainKHR(swapchain_info); + vk::Device device = instance.GetDevice(); + vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info); // If an old swapchain exists, destroy it and move the new one to its place. - if (swapchain) { - device.destroy(swapchain); + if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) { + device.destroySwapchainKHR(old_swapchain); } - swapchain = new_swapchain; // Create sync objects if not already created if (!image_available) { @@ -67,19 +76,17 @@ bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) { } // Create framebuffer and image views - swapchain_images.clear(); - SetupImages(); - - return true; + images = device.getSwapchainImagesKHR(swapchain); } // Wait for maximum of 1 second constexpr u64 ACQUIRE_TIMEOUT = 1000000000; void Swapchain::AcquireNextImage() { - auto result = g_vk_instace->GetDevice().acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, - image_available, VK_NULL_HANDLE, - &image_index); + vk::Device device = instance.GetDevice(); + vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, + image_available, VK_NULL_HANDLE, + ¤t_image); switch (result) { case vk::Result::eSuccess: break; @@ -90,15 +97,21 @@ void Swapchain::AcquireNextImage() { is_outdated = true; break; default: - LOG_ERROR(Render_Vulkan, "acquireNextImageKHR returned unknown result"); + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result"); break; } } void Swapchain::Present() { - const auto present_queue = g_vk_instace->GetPresentQueue(); + const vk::PresentInfoKHR present_info = { + .waitSemaphoreCount = 1, + .pWaitSemaphores = &render_finished, + .swapchainCount = 1, + .pSwapchains = &swapchain, + .pImageIndices = ¤t_image + }; - vk::PresentInfoKHR present_info(render_finished, swapchain, image_index); + vk::Queue present_queue = instance.GetPresentQueue(); vk::Result result = present_queue.presentKHR(present_info); switch (result) { @@ -115,91 +128,68 @@ void Swapchain::Present() { break; } - frame_index = (frame_index + 1) % swapchain_images.size(); + current_frame = (current_frame + 1) % images.size(); } -void Swapchain::PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height) { - auto gpu = g_vk_instace->GetPhysicalDevice(); +void Swapchain::Configure(u32 width, u32 height) { + vk::PhysicalDevice physical = instance.GetPhysicalDevice(); // Choose surface format - auto formats = gpu.getSurfaceFormatsKHR(surface); - details.format = formats[0]; + auto formats = physical.getSurfaceFormatsKHR(surface); + surface_format = formats[0]; if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { - details.format = { vk::Format::eB8G8R8A8Unorm }; - } - else { - for (const auto& format : formats) { - if (format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear && - format.format == vk::Format::eB8G8R8A8Unorm) { - details.format = format; - break; - } + surface_format = vk::SurfaceFormatKHR{ + .format = vk::Format::eB8G8R8A8Unorm + }; + } else { + auto iter = std::find_if(formats.begin(), formats.end(), [](vk::SurfaceFormatKHR format) -> bool { + return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear && + format.format == vk::Format::eB8G8R8A8Unorm; + }); + + if (iter == formats.end()) { + LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!"); } } // Checks if a particular mode is supported, if it is, returns that mode. - auto modes = gpu.getSurfacePresentModesKHR(surface); - auto ModePresent = [&modes](vk::PresentModeKHR check_mode) { - auto it = std::find_if(modes.begin(), modes.end(), [check_mode](const auto& mode) { - return check_mode == mode; - }); - - return it != modes.end(); - }; + auto modes = physical.getSurfacePresentModesKHR(surface); // FIFO is guaranteed by the Vulkan standard to be available - details.present_mode = vk::PresentModeKHR::eFifo; + present_mode = vk::PresentModeKHR::eFifo; + + auto iter = std::find_if(modes.begin(), modes.end(), [](vk::PresentModeKHR mode) { + return vk::PresentModeKHR::eMailbox == mode; + }); // Prefer Mailbox if present for lowest latency - if (ModePresent(vk::PresentModeKHR::eMailbox)) { - details.present_mode = vk::PresentModeKHR::eMailbox; + if (iter != modes.end()) { + present_mode = vk::PresentModeKHR::eMailbox; } // Query surface extent - auto capabilities = gpu.getSurfaceCapabilitiesKHR(surface); - details.extent = capabilities.currentExtent; + auto capabilities = physical.getSurfaceCapabilitiesKHR(surface); + extent = capabilities.currentExtent; if (capabilities.currentExtent.width == std::numeric_limits::max()) { - details.extent.width = std::clamp(width, capabilities.minImageExtent.width, + extent.width = std::clamp(width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width); - details.extent.height = std::clamp(height, capabilities.minImageExtent.height, + extent.height = std::clamp(height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height); } // Select number of images in swap chain, we prefer one buffer in the background to work on - details.image_count = capabilities.minImageCount + 1; + image_count = capabilities.minImageCount + 1; if (capabilities.maxImageCount > 0) { - details.image_count = std::min(details.image_count, capabilities.maxImageCount); + image_count = std::min(image_count, capabilities.maxImageCount); } // Prefer identity transform if possible - details.transform = vk::SurfaceTransformFlagBitsKHR::eIdentity; - if (!(capabilities.supportedTransforms & details.transform)) { - details.transform = capabilities.currentTransform; + transform = vk::SurfaceTransformFlagBitsKHR::eIdentity; + if (!(capabilities.supportedTransforms & transform)) { + transform = capabilities.currentTransform; } } -void Swapchain::SetupImages() { - // Get the swap chain images - auto device = g_vk_instace->GetDevice(); - auto images = device.getSwapchainImagesKHR(swapchain); - - Texture::Info image_info{ - .width = details.extent.width, - .height = details.extent.height, - .format = details.format.format, - .type = vk::ImageType::e2D, - .view_type = vk::ImageViewType::e2D, - .usage = vk::ImageUsageFlagBits::eColorAttachment - }; - - // Create the swapchain buffers containing the image and imageview - swapchain_images.resize(images.size()); - for (int i = 0; i < swapchain_images.size(); i++) { - // Wrap swapchain images with Texture - swapchain_images[i].Adopt(image_info, images[i]); - } -} - -} // namespace Vulkan +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 6055a72b8..29b84d79b 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -4,62 +4,90 @@ #pragma once -#include #include -#include "core/frontend/emu_window.h" -#include "video_core/renderer_vulkan/vk_texture.h" +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -struct SwapChainDetails { - vk::SurfaceFormatKHR format; +class Instance; + +class Swapchain { +public: + Swapchain(Instance& instance, vk::SurfaceKHR surface); + ~Swapchain(); + + /// Creates (or recreates) the swapchain with a given size. + void Create(u32 width, u32 height, bool vsync_enabled); + + /// Acquire the next image in the swapchain. + void AcquireNextImage(); + + /// Present the current image and move to the next one + void Present(); + + /// Return current swapchain state + inline vk::Extent2D GetExtent() const { + return extent; + } + + /// Return the swapchain surface + inline vk::SurfaceKHR GetSurface() const { + return surface; + } + + /// Return the swapchain format + inline vk::SurfaceFormatKHR GetSurfaceFormat() const { + return surface_format; + } + + /// Return the Vulkan swapchain handle + inline vk::SwapchainKHR GetHandle() const { + return swapchain; + } + + /// Return the semaphore that will be signaled when vkAcquireNextImageKHR completes + inline vk::Semaphore GetAvailableSemaphore() const { + return image_available; + } + + /// Return the semaphore that will signal when the current image will be presented + inline vk::Semaphore GetPresentSemaphore() const { + return render_finished; + } + + /// Return the current swapchain image + inline vk::Image GetCurrentImage() { + return images[current_image]; + } + + /// Returns true when the swapchain should be recreated + inline bool NeedsRecreation() const { + return is_suboptimal || is_outdated; + } + +private: + void Configure(u32 width, u32 height); + +private: + Instance& instance; + vk::SwapchainKHR swapchain = VK_NULL_HANDLE; + vk::SurfaceKHR surface = VK_NULL_HANDLE; + + // Swapchain properties + vk::SurfaceFormatKHR surface_format; vk::PresentModeKHR present_mode; vk::Extent2D extent; vk::SurfaceTransformFlagBitsKHR transform; u32 image_count; -}; -class Swapchain { -public: - Swapchain(vk::SurfaceKHR surface); - ~Swapchain(); - - /// Creates (or recreates) the swapchain with a given size. - bool Create(u32 width, u32 height, bool vsync_enabled); - - /// Acquire the next image in the swapchain. - void AcquireNextImage(); - void Present(); - - /// Returns true when the swapchain needs to be recreated. - bool NeedsRecreation() const { return IsSubOptimal() || IsOutDated(); } - bool IsOutDated() const { return is_outdated; } - bool IsSubOptimal() const { return is_suboptimal; } - bool IsVSyncEnabled() const { return vsync_enabled; } - u32 GetCurrentImageIndex() const { return image_index; } - - /// Get current swapchain state - vk::Extent2D GetSize() const { return details.extent; } - vk::SurfaceKHR GetSurface() const { return surface; } - vk::SurfaceFormatKHR GetSurfaceFormat() const { return details.format; } - vk::SwapchainKHR GetSwapChain() const { return swapchain; } - const vk::Semaphore& GetAvailableSemaphore() const { return image_available; } - const vk::Semaphore& GetRenderSemaphore() const { return render_finished; } - Texture& GetCurrentImage() { return swapchain_images[image_index]; } - -private: - void PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height); - void SetupImages(); - -private: - SwapChainDetails details{}; - vk::SurfaceKHR surface; + // Swapchain state + std::vector images; vk::Semaphore image_available, render_finished; - bool vsync_enabled{false}, is_outdated{true}, is_suboptimal{true}; - - vk::SwapchainKHR swapchain{VK_NULL_HANDLE}; - std::vector swapchain_images; - u32 image_index{0}, frame_index{0}; + u32 current_image = 0, current_frame = 0; + bool vsync_enabled = false; + bool is_outdated = true; + bool is_suboptimal = true; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp index 8604365b1..5f7dd0a02 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -2,232 +2,185 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_state.h" -#include "video_core/renderer_vulkan/vk_swapchain.h" -#include "common/assert.h" -#include "common/thread.h" +#include "video_core/renderer_vulkan/vk_buffer.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -TaskScheduler::~TaskScheduler() { +// 16MB should be enough for a single frame +constexpr BufferInfo STAGING_INFO = { + .capacity = 16 * 1024 * 1024, + .usage = BufferUsage::Staging +}; + +CommandScheduler::CommandScheduler(Instance& instance) : instance(instance) { + +} + +CommandScheduler::~CommandScheduler() { // Destroy Vulkan resources - auto device = g_vk_instace->GetDevice(); - device.waitIdle(); + vk::Device device = instance.GetDevice(); + VmaAllocator allocator = instance.GetAllocator(); - for (auto& task : tasks) { - task.staging.Destroy(); - device.destroyDescriptorPool(task.pool); + for (auto& command : commands) { + device.destroyFence(command.fence); + + // Clean up any scheduled resources + for (auto& func : command.cleanups) { + func(device, allocator); + } } - SyncToGPU(); device.destroyCommandPool(command_pool); - device.destroySemaphore(timeline); } -std::tuple TaskScheduler::RequestStaging(u32 size) { - auto& task = tasks[current_task]; - if (size > STAGING_BUFFER_SIZE - task.current_offset) { - // If we run out of space, allocate a new buffer. - // The old one will be safely destroyed when the task finishes - task.staging.Recreate(); - task.current_offset = 0; - - return std::make_tuple(task.staging.GetHostPointer(), 0); - } - - u8* ptr = task.staging.GetHostPointer() + task.current_offset; - std::memset(ptr, 0, size); - - task.current_offset += size; - return std::make_tuple(ptr, task.current_offset - size); -} - -Buffer& TaskScheduler::GetStaging() { - return tasks[current_task].staging; -} - -bool TaskScheduler::Create() { - auto device = g_vk_instace->GetDevice(); - - // Create command pool - vk::CommandPoolCreateInfo pool_info(vk::CommandPoolCreateFlagBits::eResetCommandBuffer, - g_vk_instace->GetGraphicsQueueFamilyIndex()); - command_pool = device.createCommandPool(pool_info); - - // Create timeline semaphore for syncronization - vk::SemaphoreTypeCreateInfo timeline_info{vk::SemaphoreType::eTimeline, 0}; - vk::SemaphoreCreateInfo semaphore_info{{}, &timeline_info}; - - timeline = device.createSemaphore(semaphore_info); - - Buffer::Info staging_info{ - .size = STAGING_BUFFER_SIZE, - .properties = vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent, - .usage = vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eTransferDst +bool CommandScheduler::Create() { + vk::Device device = instance.GetDevice(); + const vk::CommandPoolCreateInfo pool_info = { + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex() }; - // Should be enough for a single frame - const vk::DescriptorPoolSize pool_size{vk::DescriptorType::eCombinedImageSampler, 64}; - vk::DescriptorPoolCreateInfo pool_create_info{{}, 1024, pool_size}; + // Create command pool + command_pool = device.createCommandPool(pool_info); - for (auto& task : tasks) { - // Create command buffers - vk::CommandBufferAllocateInfo buffer_info{command_pool, vk::CommandBufferLevel::ePrimary, 2}; - auto buffers = device.allocateCommandBuffers(buffer_info); - std::ranges::copy_n(buffers.begin(), 2, task.command_buffers.begin()); + vk::CommandBufferAllocateInfo buffer_info = { + .commandPool = command_pool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT + }; - // Create staging buffer - task.staging.Create(staging_info); + // Allocate all command buffers + const auto command_buffers = device.allocateCommandBuffers(buffer_info); - // Create descriptor pool - task.pool = device.createDescriptorPool(pool_create_info); + // Initialize command slots + for (std::size_t i = 0; i < commands.size(); i++) { + commands[i] = CommandSlot{ + .render_command_buffer = command_buffers[2 * i], + .upload_command_buffer = command_buffers[2 * i + 1], + .fence = device.createFence({}), + .upload_buffer = std::make_unique(instance, *this, STAGING_INFO) + }; } return true; } -vk::CommandBuffer TaskScheduler::GetRenderCommandBuffer() const { - const auto& task = tasks[current_task]; - return task.command_buffers[1]; -} - -vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() { - auto& task = tasks[current_task]; - if (!task.use_upload_buffer) { - auto& cmdbuffer = task.command_buffers[0]; - cmdbuffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); - task.use_upload_buffer = true; - } - - return task.command_buffers[0]; -} - -vk::DescriptorPool TaskScheduler::GetDescriptorPool() const { - const auto& task = tasks[current_task]; - return task.pool; -} - -void TaskScheduler::SyncToGPU(u64 task_index) { - // No need to sync if the GPU already has finished the task - auto tick = GetGPUTick(); - if (tasks[task_index].task_id <= tick) { +void CommandScheduler::Synchronize() { + // Don't synchronize the same command twicec + CommandSlot& command = commands[current_command]; + if (command.fence_counter <= completed_fence_counter) { return; } - // Wait for the task to complete - vk::SemaphoreWaitInfo wait_info{{}, timeline, tasks[task_index].task_id}; - auto result = g_vk_instace->GetDevice().waitSemaphores(wait_info, UINT64_MAX); - - if (result != vk::Result::eSuccess) { - LOG_CRITICAL(Render_Vulkan, "Failed waiting for timeline semaphore!"); + // Wait for this command buffer to be completed. + vk::Device device = instance.GetDevice(); + if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Waiting for fences failed!"); } + + // Cleanup resources for command buffers that have completed along with the current one + const u64 now_fence_counter = command.fence_counter; + VmaAllocator allocator = instance.GetAllocator(); + for (CommandSlot& command : commands) { + if (command.fence_counter < now_fence_counter && + command.fence_counter > completed_fence_counter) { + for (auto& func: command.cleanups) { + func(device, allocator); + } + + command.cleanups.clear(); + } + } + + completed_fence_counter = now_fence_counter; } -void TaskScheduler::SyncToGPU() { - SyncToGPU(current_task); -} - -u64 TaskScheduler::GetCPUTick() const { - return current_task_id; -} - -u64 TaskScheduler::GetGPUTick() const { - auto device = g_vk_instace->GetDevice(); - return device.getSemaphoreCounterValue(timeline); -} - -void TaskScheduler::Submit(bool wait_completion, bool present, Swapchain* swapchain) { - // End the current task recording. - auto& task = tasks[current_task]; +void CommandScheduler::Submit(bool wait_completion, + vk::Semaphore wait_semaphore, + vk::Semaphore signal_semaphore) { + const CommandSlot& command = commands[current_command]; // End command buffers - task.command_buffers[1].end(); - if (task.use_upload_buffer) { - task.command_buffers[0].end(); + command.render_command_buffer.end(); + if (command.use_upload_buffer) { + command.upload_command_buffer.end(); } - const u32 num_signal_semaphores = present ? 2U : 1U; - const std::array signal_values{task.task_id, u64(0)}; - std::array signal_semaphores{timeline, vk::Semaphore{}}; - - const u32 num_wait_semaphores = present ? 2U : 1U; - const std::array wait_values{task.task_id - 1, u64(1)}; - std::array wait_semaphores{timeline, vk::Semaphore{}}; - - // When the task completes the timeline will increment to the task id - const vk::TimelineSemaphoreSubmitInfoKHR timeline_si{num_wait_semaphores, wait_values.data(), - num_signal_semaphores, signal_values.data()}; - - static constexpr std::array wait_stage_masks{ + constexpr std::array wait_stage_masks{ vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eColorAttachmentOutput, }; - const u32 cmdbuffer_count = task.use_upload_buffer ? 2u : 1u; - const vk::SubmitInfo submit_info{num_wait_semaphores, wait_semaphores.data(), wait_stage_masks.data(), cmdbuffer_count, - &task.command_buffers[2 - cmdbuffer_count], num_signal_semaphores, signal_semaphores.data(), - &timeline_si}; + const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u; + const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u; + const u32 command_buffer_count = command.use_upload_buffer ? 2u : 1u; + const std::array command_buffers = { command.render_command_buffer, + command.upload_command_buffer }; - // Wait for new swapchain image - if (present) { - signal_semaphores[1] = swapchain->GetRenderSemaphore(); - wait_semaphores[1] = swapchain->GetAvailableSemaphore(); - } + // Prepeare submit info + const vk::SubmitInfo submit_info = { + .waitSemaphoreCount = wait_semaphore_count, + .pWaitSemaphores = &wait_semaphore, + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = command_buffer_count, + .pCommandBuffers = command_buffers.data(), + .signalSemaphoreCount = signal_semaphore_count, + .pSignalSemaphores = &signal_semaphore, + }; // Submit the command buffer - auto queue = g_vk_instace->GetGraphicsQueue(); - queue.submit(submit_info); - - // Present the image when rendering has finished - if (present) { - swapchain->Present(); - } + vk::Queue queue = instance.GetGraphicsQueue(); + queue.submit(submit_info, command.fence); // Block host until the GPU catches up if (wait_completion) { - SyncToGPU(); + Synchronize(); } // Switch to next cmdbuffer. - BeginTask(); + SwitchSlot(); } -void TaskScheduler::Schedule(std::function func) { - auto& task = tasks[current_task]; - task.cleanups.push_back(func); +void CommandScheduler::Schedule(Deleter&& func) { + auto& command = commands[current_command]; + command.cleanups.push_back(func); } -void TaskScheduler::BeginTask() { - u32 next_task_index = (current_task + 1) % TASK_COUNT; - auto& task = tasks[next_task_index]; - auto device = g_vk_instace->GetDevice(); +vk::CommandBuffer CommandScheduler::GetUploadCommandBuffer() { + CommandSlot& command = commands[current_command]; + if (!command.use_upload_buffer) { + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; - // Wait for the GPU to finish with all resources for this task. - SyncToGPU(next_task_index); - - // Delete all resources that can be freed now - for (auto& func : task.cleanups) { - func(); + command.upload_command_buffer.begin(begin_info); + command.use_upload_buffer = true; } - device.resetDescriptorPool(task.pool); - task.command_buffers[1].begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); - - // Move to the next command buffer. - current_task = next_task_index; - task.task_id = ++current_task_id; - task.current_offset = 0; - task.use_upload_buffer = false; - task.cleanups.clear(); - - auto& state = VulkanState::Get(); - state.InitDescriptorSets(); + return command.upload_command_buffer; } -std::unique_ptr g_vk_task_scheduler; +void CommandScheduler::SwitchSlot() { + current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT; + CommandSlot& command = commands[current_command]; + + // Wait for the GPU to finish with all resources for this command. + Synchronize(); + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + // Move to the next command buffer. + vk::Device device = instance.GetDevice(); + device.resetFences(command.fence); + command.render_command_buffer.begin(begin_info); + command.fence_counter = next_fence_counter++; + command.use_upload_buffer = false; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h index 77634b8f0..a3a8631fa 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -4,68 +4,81 @@ #pragma once +#include #include -#include "video_core/renderer_vulkan/vk_buffer.h" +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -constexpr u32 TASK_COUNT = 5; -constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024; +constexpr u32 SCHEDULER_COMMAND_COUNT = 4; -class Swapchain; +using Deleter = std::function; -/// Wrapper class around command buffer execution. Handles an arbitrary -/// number of tasks that can be submitted concurrently. This allows the host -/// to start recording the next frame while the GPU is working on the -/// current one. Larger values can be used with caution, as they can cause -/// frame latency if the CPU is too far ahead of the GPU -class TaskScheduler { +class Buffer; +class Instance; + +class CommandScheduler { public: - TaskScheduler() = default; - ~TaskScheduler(); + CommandScheduler(Instance& instance); + ~CommandScheduler(); /// Create and initialize the work scheduler bool Create(); - /// Retrieve either of the current frame's command buffers - vk::CommandBuffer GetRenderCommandBuffer() const; + /// Block host until the current command completes execution + void Synchronize(); + + /// Defer operation until the current command completes execution + void Schedule(Deleter&& func); + + /// Submits the current command to the graphics queue + void Submit(bool wait_completion = false, vk::Semaphore wait = VK_NULL_HANDLE, + vk::Semaphore signal = VK_NULL_HANDLE); + + /// Returns the command buffer used for early upload operations. + /// This is useful for vertex/uniform buffer uploads that happen once per frame vk::CommandBuffer GetUploadCommandBuffer(); - vk::DescriptorPool GetDescriptorPool() const; - /// Access the staging buffer of the current task - std::tuple RequestStaging(u32 size); - Buffer& GetStaging(); + /// Returns the command buffer used for rendering + inline vk::CommandBuffer GetRenderCommandBuffer() const { + const CommandSlot& command = commands[current_command]; + return command.render_command_buffer; + } - /// Query and/or synchronization CPU and GPU - u64 GetCPUTick() const; - u64 GetGPUTick() const; - void SyncToGPU(); - void SyncToGPU(u64 task_index); + /// Returns the upload buffer of the active command slot + inline Buffer& GetCommandUploadBuffer() { + CommandSlot& command = commands[current_command]; + return *command.upload_buffer; + } - void Schedule(std::function func); - void Submit(bool wait_completion = false, bool present = false, Swapchain* swapchain = nullptr); - - void BeginTask(); + /// Returns the index of the current command slot + inline u32 GetCurrentSlotIndex() const { + return current_command; + } private: - struct Task { + /// Activates the next command slot and optionally waits for its completion + void SwitchSlot(); + +private: + Instance& instance; + u64 next_fence_counter = 1; + u64 completed_fence_counter = 0; + + struct CommandSlot { bool use_upload_buffer = false; - u64 current_offset = 0, task_id = 0; - std::array command_buffers; - std::vector> cleanups; - vk::DescriptorPool pool; - Buffer staging; + u64 fence_counter = 0; + vk::CommandBuffer render_command_buffer, upload_command_buffer; + vk::Fence fence = VK_NULL_HANDLE; + std::unique_ptr upload_buffer; + std::vector cleanups; }; - vk::Semaphore timeline; - vk::CommandPool command_pool; - u64 current_task_id = 0; - - // Each task contains unique resources - std::array tasks; - u64 current_task = -1; + vk::CommandPool command_pool = VK_NULL_HANDLE; + std::array commands; + u32 current_command = 0; }; -extern std::unique_ptr g_vk_task_scheduler; - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture.cpp b/src/video_core/renderer_vulkan/vk_texture.cpp index 1c671501b..19cf64b59 100644 --- a/src/video_core/renderer_vulkan/vk_texture.cpp +++ b/src/video_core/renderer_vulkan/vk_texture.cpp @@ -2,288 +2,229 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include +#define VULKAN_HPP_NO_CONSTRUCTORS #include "common/assert.h" #include "common/logging/log.h" +#include "video_core/renderer_vulkan/pica_to_vulkan.h" +#include "video_core/renderer_vulkan/vk_buffer.h" #include "video_core/renderer_vulkan/vk_texture.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" -#include "video_core/renderer_vulkan/vk_state.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -static int BytesPerPixel(vk::Format format) { +inline vk::Format ToVkFormat(TextureFormat format) { switch (format) { - case vk::Format::eD32SfloatS8Uint: - return 5; - case vk::Format::eD32Sfloat: - case vk::Format::eB8G8R8A8Unorm: - case vk::Format::eR8G8B8A8Uint: - case vk::Format::eR8G8B8A8Unorm: - case vk::Format::eD24UnormS8Uint: - return 4; - case vk::Format::eR8G8B8Unorm: - case vk::Format::eR8G8B8Srgb: - return 3; - case vk::Format::eR5G6B5UnormPack16: - case vk::Format::eR5G5B5A1UnormPack16: - case vk::Format::eR4G4B4A4UnormPack16: - case vk::Format::eD16Unorm: - return 2; + case TextureFormat::RGBA8: + return vk::Format::eR8G8B8A8Unorm; + case TextureFormat::RGB8: + return vk::Format::eR8G8B8Unorm; + case TextureFormat::RGB5A1: + return vk::Format::eR5G5B5A1UnormPack16; + case TextureFormat::RGB565: + return vk::Format::eR5G6B5UnormPack16; + case TextureFormat::RGBA4: + return vk::Format::eR4G4B4A4UnormPack16; + case TextureFormat::D16: + return vk::Format::eD16Unorm; + case TextureFormat::D24: + return vk::Format::eX8D24UnormPack32; + case TextureFormat::D24S8: + return vk::Format::eD24UnormS8Uint; default: - UNREACHABLE(); + LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format); + return vk::Format::eUndefined; } } -vk::ImageAspectFlags GetImageAspect(vk::Format format) { - vk::ImageAspectFlags flags; - switch (format) { - case vk::Format::eD16UnormS8Uint: - case vk::Format::eD24UnormS8Uint: - case vk::Format::eD32SfloatS8Uint: - flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth; - break; - case vk::Format::eD16Unorm: - case vk::Format::eD32Sfloat: - flags = vk::ImageAspectFlagBits::eDepth; - break; +inline vk::ImageType ToVkImageType(TextureType type) { + switch (type) { + case TextureType::Texture1D: + return vk::ImageType::e1D; + case TextureType::Texture2D: + return vk::ImageType::e2D; + case TextureType::Texture3D: + return vk::ImageType::e3D; default: - flags = vk::ImageAspectFlagBits::eColor; + LOG_ERROR(Render_Vulkan, "Unknown texture type {}!", type); + return vk::ImageType::e2D; } +} - return flags; +inline vk::ImageViewType ToVkImageViewType(TextureViewType view_type) { + switch (view_type) { + case TextureViewType::View1D: + return vk::ImageViewType::e1D; + case TextureViewType::View2D: + return vk::ImageViewType::e2D; + case TextureViewType::View3D: + return vk::ImageViewType::e3D; + case TextureViewType::ViewCube: + return vk::ImageViewType::eCube; + case TextureViewType::View1DArray: + return vk::ImageViewType::e1DArray; + case TextureViewType::View2DArray: + return vk::ImageViewType::e2DArray; + case TextureViewType::ViewCubeArray: + return vk::ImageViewType::eCubeArray; + default: + LOG_ERROR(Render_Vulkan, "Unknown texture view type {}!", view_type); + return vk::ImageViewType::e2D; + } +} + +Texture::Texture(Instance& instance, CommandScheduler& scheduler) : + instance(instance), scheduler(scheduler) {} + +Texture::Texture(Instance& instance, CommandScheduler& scheduler, + const TextureInfo& info) : TextureBase(info), + instance(instance), scheduler(scheduler) { + + // Convert the input format to another that supports attachments + advertised_format = ToVkFormat(info.format); + internal_format = instance.GetFormatAlternative(advertised_format); + aspect = GetImageAspect(advertised_format); + + vk::Device device = instance.GetDevice(); + const vk::ImageCreateInfo image_info = { + .flags = info.view_type == TextureViewType::ViewCube ? + vk::ImageCreateFlagBits::eCubeCompatible : + vk::ImageCreateFlags{}, + .imageType = ToVkImageType(info.type), + .format = internal_format, + .extent = {info.width, info.height, 1}, + .mipLevels = info.levels, + .arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u, + .samples = vk::SampleCountFlagBits::e1, + .usage = GetImageUsage(aspect), + }; + + const VmaAllocationCreateInfo alloc_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE + }; + + VkImage unsafe_image = VK_NULL_HANDLE; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + VmaAllocator allocator = instance.GetAllocator(); + + // Allocate texture memory + vmaCreateImage(allocator, &unsafe_image_info, &alloc_info, &unsafe_image, &allocation, nullptr); + image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = ToVkImageViewType(info.view_type), + .format = internal_format, + .subresourceRange = {aspect, 0, info.levels, 0, 1} + }; + + // Create image view + image_view = device.createImageView(view_info); +} + +Texture::Texture(Instance& instance, CommandScheduler& scheduler, + vk::Image image, const TextureInfo& info) : TextureBase(info), + instance(instance), scheduler(scheduler), image(image), + is_texture_owned(false) { + + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = ToVkImageViewType(info.view_type), + .format = internal_format, + .subresourceRange = {aspect, 0, info.levels, 0, 1} + }; + + // Create image view + vk::Device device = instance.GetDevice(); + image_view = device.createImageView(view_info); } Texture::~Texture() { - Destroy(); -} - -Texture::Texture(Texture&& other) noexcept { - info = std::exchange(other.info, Info{}); - texture = std::exchange(other.texture, VK_NULL_HANDLE); - aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone); - view = std::exchange(other.view, VK_NULL_HANDLE); - memory = std::exchange(other.memory, VK_NULL_HANDLE); - image_size = std::exchange(other.image_size, 0); - adopted = std::exchange(other.adopted, false); - is_rgb = std::exchange(other.is_rgb, false); - is_d24s8 = std::exchange(other.is_d24s8, false); -} - -Texture& Texture::operator=(Texture&& other) noexcept { - Destroy(); - info = std::exchange(other.info, Info{}); - texture = std::exchange(other.texture, VK_NULL_HANDLE); - aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone); - view = std::exchange(other.view, VK_NULL_HANDLE); - memory = std::exchange(other.memory, VK_NULL_HANDLE); - image_size = std::exchange(other.image_size, 0); - adopted = std::exchange(other.adopted, false); - is_rgb = std::exchange(other.is_rgb, false); - is_d24s8 = std::exchange(other.is_d24s8, false); - return *this; -} - -void Texture::Create(const Info& create_info) { - auto device = g_vk_instace->GetDevice(); - info = create_info; - - // Emulate RGB8 format with RGBA8 - is_rgb = false; - if (info.format == vk::Format::eR8G8B8Unorm) { - is_rgb = true; - info.format = vk::Format::eR8G8B8A8Unorm; - } - - is_d24s8 = false; - if (info.format == vk::Format::eD24UnormS8Uint) { - is_d24s8 = true; - info.format = vk::Format::eD32SfloatS8Uint; - } - - // Create the texture - image_size = info.width * info.height * BytesPerPixel(info.format); - aspect = GetImageAspect(info.format); - - vk::ImageCreateFlags flags{}; - if (info.view_type == vk::ImageViewType::eCube) { - flags = vk::ImageCreateFlagBits::eCubeCompatible; - } - - vk::ImageCreateInfo image_info { - flags, info.type, info.format, - { info.width, info.height, 1 }, info.levels, info.layers, - static_cast(info.multisamples), - vk::ImageTiling::eOptimal, info.usage - }; - - texture = device.createImage(image_info); - - // Create texture memory - auto requirements = device.getImageMemoryRequirements(texture); - auto memory_index = Buffer::FindMemoryType(requirements.memoryTypeBits, - vk::MemoryPropertyFlagBits::eDeviceLocal); - vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index); - - memory = device.allocateMemory(alloc_info); - device.bindImageMemory(texture, memory, 0); - - // Create texture view - vk::ImageViewCreateInfo view_info { - {}, texture, info.view_type, info.format, {}, - {aspect, 0, info.levels, 0, info.layers} - }; - - view = device.createImageView(view_info); -} - -void Texture::Create(Texture& other) { - auto info = other.info; - Create(info); - - // Copy the buffer contents - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal); - - auto old_layout = other.GetLayout(); - other.Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal); - - u32 copy_count = 0; - std::array copy_regions; - - for (u32 i = 0; i < info.levels; i++) { - copy_regions[copy_count++] = vk::ImageCopy{ - vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0}, - vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0}, - {info.width, info.height, 0} - }; - } - - cmdbuffer.copyImage(other.GetHandle(), vk::ImageLayout::eTransferSrcOptimal, - texture, vk::ImageLayout::eTransferDstOptimal, copy_count, - copy_regions.data()); - - Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); - other.Transition(cmdbuffer, old_layout); -} - -void Texture::Adopt(const Info& create_info, vk::Image image) { - info = create_info; - image_size = info.width * info.height * BytesPerPixel(info.format); - aspect = GetImageAspect(info.format); - texture = image; - - // Create texture view - vk::ImageViewCreateInfo view_info { - {}, texture, info.view_type, info.format, {}, - {aspect, 0, info.levels, 0, info.layers} - }; - - auto device = g_vk_instace->GetDevice(); - view = device.createImageView(view_info); - adopted = true; -} - -void Texture::Destroy() { - if (texture && !adopted) { - // Make sure to unbind the texture before destroying it - auto& state = VulkanState::Get(); - state.UnbindTexture(*this); - - auto deleter = [texture = texture, - view = view, - memory = memory]() { - auto device = g_vk_instace->GetDevice(); - if (texture) { - device.destroyImage(texture); - device.destroyImageView(view); - device.freeMemory(memory); - } - }; - - // Schedule deletion of the texture after it's no longer used - // by the GPU - g_vk_task_scheduler->Schedule(deleter); - } - - // If the image was adopted (probably from the swapchain) then only - // destroy the view - if (adopted) { - g_vk_task_scheduler->Schedule([view = view](){ - auto device = g_vk_instace->GetDevice(); + if (image && is_texture_owned) { + auto deleter = [image = image, allocation = allocation, + view = image_view](vk::Device device, VmaAllocator allocator) { device.destroyImageView(view); - }); + vmaDestroyImage(allocator, static_cast(image), allocation); + }; + + // Schedule deletion of the texture after it's no longer used by the GPU + scheduler.Schedule(deleter); + } else if (!is_texture_owned) { + // If the texture is not owning, destroy the view immediately as + // synchronization is the caller's responsibility + vk::Device device = instance.GetDevice(); + device.destroyImageView(image_view); } } -void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout) { - Transition(cmdbuffer, new_layout, 0, info.levels, 0, info.layers); -} +void Texture::Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, + u32 level, u32 level_count) { + ASSERT(level + level_count < TEXTURE_MAX_LEVELS); -void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout, - u32 start_level, u32 level_count, u32 start_layer, u32 layer_count) { - if (new_layout == layout) { + // Ensure all miplevels in the range have the same layout + vk::ImageLayout old_layout = layouts[level]; + if (old_layout != vk::ImageLayout::eUndefined) { + for (u32 i = 0; i < level_count; i++) { + ASSERT(layouts[level + i] == old_layout); + } + } + + // Don't do anything if the image is already in the wanted layout + if (new_layout == old_layout) { return; } struct LayoutInfo { - vk::ImageLayout layout; vk::AccessFlags access; vk::PipelineStageFlags stage; }; // Get optimal transition settings for every image layout. Settings taken from Dolphin - auto layout_info = [](vk::ImageLayout layout) -> LayoutInfo { - LayoutInfo info{ .layout = layout }; + auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo { + LayoutInfo info; switch (layout) { case vk::ImageLayout::eUndefined: // Layout undefined therefore contents undefined, and we don't care what happens to it. info.access = vk::AccessFlagBits::eNone; info.stage = vk::PipelineStageFlagBits::eTopOfPipe; break; - case vk::ImageLayout::ePreinitialized: // Image has been pre-initialized by the host, so ensure all writes have completed. info.access = vk::AccessFlagBits::eHostWrite; info.stage = vk::PipelineStageFlagBits::eHost; break; - case vk::ImageLayout::eColorAttachmentOptimal: // Image was being used as a color attachment, so ensure all writes have completed. - info.access = vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; + info.access = vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite; info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput; break; - case vk::ImageLayout::eDepthStencilAttachmentOptimal: // Image was being used as a depthstencil attachment, so ensure all writes have completed. - info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | vk::AccessFlagBits::eDepthStencilAttachmentWrite; - info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests; + info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests; break; - case vk::ImageLayout::ePresentSrcKHR: info.access = vk::AccessFlagBits::eNone; info.stage = vk::PipelineStageFlagBits::eBottomOfPipe; break; - case vk::ImageLayout::eShaderReadOnlyOptimal: // Image was being used as a shader resource, make sure all reads have finished. info.access = vk::AccessFlagBits::eShaderRead; info.stage = vk::PipelineStageFlagBits::eFragmentShader; break; - case vk::ImageLayout::eTransferSrcOptimal: // Image was being used as a copy source, ensure all reads have finished. info.access = vk::AccessFlagBits::eTransferRead; info.stage = vk::PipelineStageFlagBits::eTransfer; break; - case vk::ImageLayout::eTransferDstOptimal: // Image was being used as a copy destination, ensure all writes have finished. info.access = vk::AccessFlagBits::eTransferWrite; info.stage = vk::PipelineStageFlagBits::eTransfer; break; - default: LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout); UNREACHABLE(); @@ -292,220 +233,286 @@ void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout return info; }; + LayoutInfo source = GetLayoutInfo(old_layout); + LayoutInfo dest = GetLayoutInfo(new_layout); + + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = source.access, + .dstAccessMask = dest.access, + .oldLayout = old_layout, + .newLayout = new_layout, + .image = image, + .subresourceRange = {aspect, level, level_count, 0, 1} + }; + // Submit pipeline barrier - LayoutInfo source = layout_info(layout), dst = layout_info(new_layout); - vk::ImageMemoryBarrier barrier { - source.access, dst.access, - source.layout, dst.layout, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - texture, - vk::ImageSubresourceRange{aspect, start_level, level_count, start_layer, layer_count} + command_buffer.pipelineBarrier(source.stage, dest.stage, + vk::DependencyFlagBits::eByRegion, + {}, {}, barrier); + + // Update layouts + SetLayout(new_layout, level, level_count); +} + +void Texture::SetLayout(vk::ImageLayout new_layout, u32 level, u32 level_count) { + std::fill_n(layouts.begin() + level, level_count, new_layout); +} + +void Texture::Upload(Rect2D rectangle, u32 stride, std::span data, u32 level) { + const u64 byte_count = data.size(); + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + + // If the adverised format supports blitting then use GPU accelerated + // format conversion. + if (internal_format != advertised_format && + instance.IsFormatSupported(advertised_format, + vk::FormatFeatureFlagBits::eBlitSrc)) { + // Creating a new staging texture for each upload/download is expensive + // but this path is not common. TODO: Profile this + StagingTexture staging{instance, scheduler, info}; + + const std::array offsets = { + vk::Offset3D{rectangle.x, rectangle.y, 0}, + vk::Offset3D{static_cast(rectangle.x + rectangle.width), + static_cast(rectangle.y + rectangle.height), 0} + }; + + const vk::ImageBlit image_blit = { + .srcSubresource = {aspect, level, 0, 1}, + .srcOffsets = offsets, + .dstSubresource = {aspect, level, 0, 1}, + .dstOffsets = offsets + }; + + // Copy data to staging texture + std::memcpy(staging.GetMappedPtr(), data.data(), byte_count); + staging.Commit(byte_count); + + Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level); + + // Blit + command_buffer.blitImage(staging.GetHandle(), vk::ImageLayout::eGeneral, + image, vk::ImageLayout::eTransferDstOptimal, + image_blit, vk::Filter::eNearest); + + // Otherwise use normal staging buffer path with possible CPU conversion + } else { + Buffer& staging = scheduler.GetCommandUploadBuffer(); + const u64 staging_offset = staging.GetCurrentOffset(); + + // Copy pixels to the staging buffer + auto slice = staging.Map(byte_count); + std::memcpy(slice.data(), data.data(), byte_count); + staging.Commit(byte_count); + + // TODO: Handle depth and stencil uploads + ASSERT(aspect == vk::ImageAspectFlagBits::eColor && + advertised_format == internal_format); + + const vk::BufferImageCopy copy_region = { + .bufferOffset = staging_offset, + .bufferRowLength = stride, + .bufferImageHeight = rectangle.height, + .imageSubresource = { + .aspectMask = aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {rectangle.x, rectangle.y, 0}, + .imageExtent = {rectangle.width, rectangle.height, 1} + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level); + + // Copy staging buffer to the texture + command_buffer.copyBufferToImage(staging.GetHandle(), image, + vk::ImageLayout::eTransferDstOptimal, + copy_region); + } + + Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal); +} + +void Texture::Download(Rect2D rectangle, u32 stride, std::span data, u32 level) { + const u64 byte_count = data.size(); + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + + // If the adverised format supports blitting then use GPU accelerated + // format conversion. + if (internal_format != advertised_format && + instance.IsFormatSupported(advertised_format, + vk::FormatFeatureFlagBits::eBlitDst)) { + // Creating a new staging texture for each upload/download is expensive + // but this path is not common. TODO: Profile this + StagingTexture staging{instance, scheduler, info}; + + const std::array offsets = { + vk::Offset3D{rectangle.x, rectangle.y, 0}, + vk::Offset3D{static_cast(rectangle.x + rectangle.width), + static_cast(rectangle.y + rectangle.height), 0} + }; + + const vk::ImageBlit image_blit = { + .srcSubresource = {aspect, level, 0, 1}, + .srcOffsets = offsets, + .dstSubresource = {aspect, level, 0, 1}, + .dstOffsets = offsets + }; + + Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level); + + // Blit + command_buffer.blitImage(image, vk::ImageLayout::eTransferSrcOptimal, + staging.GetHandle(), vk::ImageLayout::eGeneral, + image_blit, vk::Filter::eNearest); + + // TODO: Async downloads + scheduler.Submit(true); + + // Copy data to the destination + staging.Commit(byte_count); + std::memcpy(data.data(), staging.GetMappedPtr(), byte_count); + + // Otherwise use normal staging buffer path with possible CPU conversion + } else { + Buffer& staging = scheduler.GetCommandUploadBuffer(); + const u64 staging_offset = staging.GetCurrentOffset(); + + const vk::BufferImageCopy copy_region = { + .bufferOffset = staging_offset, + .bufferRowLength = stride, + .bufferImageHeight = rectangle.height, + .imageSubresource = { + .aspectMask = aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {rectangle.x, rectangle.y, 0}, + .imageExtent = {rectangle.width, rectangle.height, 1} + }; + + Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level); + + // Copy pixel data to the staging buffer + command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, + staging.GetHandle(), copy_region); + + Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal); + + // TODO: Async downloads + scheduler.Submit(true); + + // Copy data to the destination + auto memory = staging.Map(byte_count); + std::memcpy(data.data(), memory.data(), byte_count); + } +} + +StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler, + const TextureInfo& info) : + TextureBase(info), instance(instance), scheduler(scheduler) { + + format = ToVkFormat(info.format); + const vk::ImageCreateInfo image_info = { + .flags = info.view_type == TextureViewType::ViewCube ? + vk::ImageCreateFlagBits::eCubeCompatible : + vk::ImageCreateFlags{}, + .imageType = ToVkImageType(info.type), + .format = format, + .extent = {info.width, info.height, 1}, + .mipLevels = info.levels, + .arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u, + .samples = vk::SampleCountFlagBits::e1, + .usage = vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst, }; - cmdbuffer.pipelineBarrier(source.stage, dst.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier); - layout = new_layout; -} - -void Texture::OverrideImageLayout(vk::ImageLayout new_layout) { - layout = new_layout; -} - -void Texture::Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span pixels) { - u32 request_size = is_rgb ? (pixels.size() / 3) * 4 : - (is_d24s8 ? (pixels.size() / 4) * 5 : pixels.size()); - auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size); - if (!buffer) { - LOG_ERROR(Render_Vulkan, "Cannot upload pixels without staging buffer!"); - } - - // Copy pixels to staging buffer - auto& state = VulkanState::Get(); - state.EndRendering(); - - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - - // Automatically convert RGB to RGBA - if (is_rgb) { - auto data = RGBToRGBA(pixels); - std::memcpy(buffer, data.data(), data.size()); - } - else if (is_d24s8) { - auto data = D24S8ToD32S8(pixels); - std::memcpy(buffer, data.data(), data.size() * sizeof(data[0])); - } - else { - std::memcpy(buffer, pixels.data(), pixels.size()); - } - - std::array copy_regions; - u32 region_count = 1; - - copy_regions[0] = vk::BufferImageCopy{ - offset, row_length, region.extent.height, - {aspect, level, layer, 1}, - {region.offset.x, region.offset.y, 0}, - {region.extent.width, region.extent.height, 1} + const VmaAllocationCreateInfo alloc_create_info = { + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT, + .usage = VMA_MEMORY_USAGE_AUTO }; - if (aspect & vk::ImageAspectFlagBits::eDepth && - aspect & vk::ImageAspectFlagBits::eStencil) { - // Copying both depth and stencil requires two seperate regions - copy_regions[1] = copy_regions[0]; - copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + VkImage unsafe_image = VK_NULL_HANDLE; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + VmaAllocationInfo alloc_info; + VmaAllocator allocator = instance.GetAllocator(); - region_count++; - } + // Allocate texture memory + vmaCreateImage(allocator, &unsafe_image_info, &alloc_create_info, + &unsafe_image, &allocation, &alloc_info); + image = vk::Image{unsafe_image}; - // Transition image to transfer format - Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal); + // Map memory + mapped_ptr = alloc_info.pMappedData; - cmdbuffer.copyBufferToImage(g_vk_task_scheduler->GetStaging().GetBuffer(), - texture, vk::ImageLayout::eTransferDstOptimal, region_count, - copy_regions.data()); - - // Prepare image for shader reads - Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); -} - -void Texture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span memory) { - u32 request_size = is_rgb ? (memory.size() / 3) * 4 : - (is_d24s8 ? (memory.size() / 4) * 8 : memory.size()); - auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size); - if (!buffer) { - LOG_ERROR(Render_Vulkan, "Cannot download texture without staging buffer!"); - } - - auto& state = VulkanState::Get(); - state.EndRendering(); - - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - - std::array copy_regions; - u32 region_count = 1; - - copy_regions[0] = vk::BufferImageCopy{ - offset, row_length, region.extent.height, - {aspect, level, layer, 1}, - {region.offset.x, region.offset.y, 0}, - {region.extent.width, region.extent.height, 1} + // Transition image to VK_IMAGE_LAYOUT_GENERAL. This layout is convenient + // for staging textures since it allows for well defined host access and + // works with vkCmdBlitImage, thus eliminating the need for layout transitions + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .image = image, + .subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, info.levels, 0, 1} }; - if (aspect & vk::ImageAspectFlagBits::eDepth && - aspect & vk::ImageAspectFlagBits::eStencil) { - // Copying both depth and stencil requires two seperate regions - copy_regions[1] = copy_regions[0]; - copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer(); + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eBottomOfPipe, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, + {}, {}, barrier); +} - region_count++; - } +StagingTexture::~StagingTexture() { + if (image) { + auto deleter = [allocation = allocation, + image = image](vk::Device device, VmaAllocator allocator) { + vmaDestroyImage(allocator, static_cast(image), allocation); + }; - // Transition image to transfer format - auto old_layout = GetLayout(); - Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal); - - cmdbuffer.copyImageToBuffer(texture, vk::ImageLayout::eTransferSrcOptimal, - g_vk_task_scheduler->GetStaging().GetBuffer(), - region_count, copy_regions.data()); - - // Restore layout - Transition(cmdbuffer, old_layout); - - // Wait for the data to be available - // NOTE: This is really slow and should be reworked - g_vk_task_scheduler->Submit(true); - - // Automatically convert RGB to RGBA - if (is_rgb) { - auto data = RGBAToRGB(std::span(buffer, request_size)); - std::memcpy(memory.data(), data.data(), memory.size()); - } - else if (is_d24s8) { - auto data = D32S8ToD24S8(std::span(buffer, request_size)); - std::memcpy(memory.data(), data.data(), memory.size()); - } - else { - std::memcpy(memory.data(), buffer, memory.size()); + // Schedule deletion of the texture after it's no longer used by the GPU + scheduler.Schedule(deleter); } } -template -std::span SpanCast(std::span span) { - return std::span(reinterpret_cast(span.data()), span.size_bytes() / sizeof(Out)); +void StagingTexture::Commit(u32 size) { + VmaAllocator allocator = instance.GetAllocator(); + vmaFlushAllocation(allocator, allocation, 0, size); } -std::vector Texture::RGBToRGBA(std::span data) { - ASSERT(data.size() % 3 == 0); +Sampler::Sampler(Instance& instance, SamplerInfo info) : + SamplerBase(info), instance(instance) { - u32 new_size = (data.size() / 3) * 4; - std::vector rgba(new_size); + auto properties = instance.GetPhysicalDevice().getProperties(); + const auto filtering = PicaToVK::TextureFilterMode(info.mag_filter, + info.min_filter, + info.mip_filter); + const vk::SamplerCreateInfo sampler_info = { + .magFilter = filtering.mag_filter, + .minFilter = filtering.min_filter, + .mipmapMode = filtering.mip_mode, + .addressModeU = PicaToVK::WrapMode(info.wrap_s), + .addressModeV = PicaToVK::WrapMode(info.wrap_t), + .anisotropyEnable = true, + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .borderColor = vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false + }; - u32 dst_pos = 0; - for (u32 i = 0; i < data.size(); i += 3) { - std::memcpy(rgba.data() + dst_pos, data.data() + i, 3); - rgba[dst_pos + 3] = 255u; - dst_pos += 4; - } - - return rgba; + vk::Device device = instance.GetDevice(); + sampler = device.createSampler(sampler_info); } -std::vector Texture::D24S8ToD32S8(std::span data) { - ASSERT(data.size() % 4 == 0); - - std::vector d32s8; - std::span d24s8 = SpanCast(data); - - d32s8.reserve(data.size() * 2); - std::ranges::transform(d24s8, std::back_inserter(d32s8), [](u32 comp) -> u64 { - // Convert normalized 24bit depth component to floating point - float fdepth = static_cast(comp & 0xFFFFFF) / 0xFFFFFF; - u64 result = static_cast(comp) << 8; - - // Use std::memcpy to avoid the unsafe casting required to preserve the floating - // point bits - std::memcpy(&result, &fdepth, 4); - return result; - }); - - return d32s8; -} - -std::vector Texture::RGBAToRGB(std::span data) { - ASSERT(data.size() % 4 == 0); - - u32 new_size = (data.size() / 4) * 3; - std::vector rgb(new_size); - - u32 dst_pos = 0; - for (u32 i = 0; i < data.size(); i += 4) { - std::memcpy(rgb.data() + dst_pos, data.data() + i, 3); - dst_pos += 3; - } - - return rgb; -} - -std::vector Texture::D32S8ToD24S8(std::span data) { - ASSERT(data.size() % 8 == 0); - - std::vector d24s8; - std::span d32s8 = SpanCast(data); - - d24s8.reserve(data.size() / 2); - std::ranges::transform(d32s8, std::back_inserter(d24s8), [](u64 comp) -> u32 { - // Convert floating point to 24bit normalized depth - float fdepth = 0.f; - u32 depth = comp & 0xFFFFFFFF; - std::memcpy(&fdepth, &depth, 4); - - u32 stencil = (comp >> 32) & 0xFF; - u64 result = static_cast(fdepth * 0xFFFFFF) | (stencil << 24); - return result; - }); - - return d24s8; +Sampler::~Sampler() { + vk::Device device = instance.GetDevice(); + device.destroySampler(sampler); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture.h b/src/video_core/renderer_vulkan/vk_texture.h index 53a1c35b5..4e53d83cd 100644 --- a/src/video_core/renderer_vulkan/vk_texture.h +++ b/src/video_core/renderer_vulkan/vk_texture.h @@ -4,80 +4,147 @@ #pragma once -#include -#include -#include -#include -#include "common/math_util.h" -#include "video_core/renderer_vulkan/vk_buffer.h" -#include "video_core/renderer_vulkan/vk_surface_params.h" +#include "video_core/common/texture.h" +#include "video_core/renderer_vulkan/vk_common.h" -namespace Vulkan { +namespace VideoCore::Vulkan { -/// Vulkan texture object -class Texture final : public NonCopyable { +// PICA texture have at most 8 mipmap levels +constexpr u32 TEXTURE_MAX_LEVELS = 8; + +class Instance; +class CommandScheduler; + +/** + * A texture located in GPU memory + */ +class Texture : public VideoCore::TextureBase { public: - /// Information for the creation of the target texture - struct Info { - u32 width, height; - vk::Format format; - vk::ImageType type; - vk::ImageViewType view_type; - vk::ImageUsageFlags usage; - u32 multisamples = 1; - u32 levels = 1, layers = 1; - }; + // Default constructor + Texture(Instance& instance, CommandScheduler& scheduler); + + // Constructor for texture creation + Texture(Instance& instance, CommandScheduler& scheduler, + const TextureInfo& info); + + // Constructor for not owning textures (swapchain) + Texture(Instance& instance, CommandScheduler& scheduler, + vk::Image image, const TextureInfo& info); - Texture() = default; ~Texture(); - /// Enable move operations - Texture(Texture&& other) noexcept; - Texture& operator=(Texture&& other) noexcept; + /// Uploads pixel data to the GPU memory + void Upload(Rect2D rectangle, u32 stride, std::span data, + u32 level = 0) override; - /// Create a new Vulkan texture object - void Create(const Info& info); - void Create(Texture& texture); - void Adopt(const Info& info, vk::Image image); - void Destroy(); + /// Downloads pixel data from GPU memory + void Download(Rect2D rectangle, u32 stride, std::span data, + u32 level = 0) override; - /// Query objects - bool IsValid() const { return texture; } - vk::Image GetHandle() const { return texture; } - vk::ImageView GetView() const { return view; } - vk::Format GetFormat() const { return info.format; } - vk::ImageLayout GetLayout() const { return layout; } - u32 GetSamples() const { return info.multisamples; } - u32 GetSize() const { return image_size; } - vk::Rect2D GetArea() const { return {{0, 0},{info.width, info.height}}; } + /// Copies the rectangle area specified to the destionation texture + void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect, + u32 src_level = 0, u32 dest_level = 0) override; - /// Copies CPU side pixel data to the GPU texture buffer - void Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span pixels); - void Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span dst); + /// Overrides the layout of provided image subresource + void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1); - /// Used to transition the image to an optimal layout during transfers - void OverrideImageLayout(vk::ImageLayout new_layout); - void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout); - void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout, u32 start_level, u32 level_count, - u32 start_layer, u32 layer_count); + /// Transitions part of the image to the provided layout + void Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, + u32 level = 0, u32 level_count = 1); + + /// Returns the underlying vulkan image handle + vk::Image GetHandle() const { + return image; + } + + /// Returns the Vulka image view + vk::ImageView GetView() const { + return image_view; + } + + /// Returns the internal format backing the texture. + /// It may not match the input pixel format. + vk::Format GetInternalFormat() const { + return internal_format; + } + + /// Returns the current image layout + vk::ImageLayout GetLayout(u32 level = 0) const { + return layouts.at(level); + } + + /// Returns a rectangle that represents the complete area of the texture + vk::Rect2D GetArea() const { + return {{0, 0},{info.width, info.height}}; + } private: - std::vector RGBToRGBA(std::span data); - std::vector D24S8ToD32S8(std::span data); + Instance& instance; + CommandScheduler& scheduler; - std::vector RGBAToRGB(std::span data); - std::vector D32S8ToD24S8(std::span data); + // Vulkan texture handle + vk::Image image = VK_NULL_HANDLE; + vk::ImageView image_view = VK_NULL_HANDLE; + VmaAllocation allocation = nullptr; + bool is_texture_owned = true; + + // Texture properties + vk::Format advertised_format = vk::Format::eUndefined; + vk::Format internal_format = vk::Format::eUndefined; + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone; + std::array layouts; +}; + +/** + * Staging texture located in CPU memory. Used for intermediate format + * conversions + */ +class StagingTexture : public VideoCore::TextureBase { +public: + StagingTexture(Instance& instance, CommandScheduler& scheduler, + const TextureInfo& info); + ~StagingTexture(); + + /// Flushes any writes made to texture memory + void Commit(u32 size); + + /// Returns a span of the mapped texture memory + void* GetMappedPtr() { + return mapped_ptr; + } + + /// Returns the staging image handle + vk::Image GetHandle() const { + return image; + } private: - Texture::Info info{}; - vk::ImageLayout layout{}; - vk::ImageAspectFlags aspect{}; - vk::Image texture; - vk::ImageView view; - vk::DeviceMemory memory; - u32 image_size{}; - bool adopted{false}; - bool is_rgb{false}, is_d24s8{false}; + Instance& instance; + CommandScheduler& scheduler; + + vk::Image image = VK_NULL_HANDLE; + VmaAllocation allocation = VK_NULL_HANDLE; + vk::Format format = vk::Format::eUndefined; + u32 capacity = 0; + void* mapped_ptr = nullptr; +}; + +/** + * Vulkan sampler object + */ +class Sampler : public VideoCore::SamplerBase { +public: + Sampler(Instance& instance, SamplerInfo info); + ~Sampler() override; + + /// Returns the underlying vulkan sampler handle + vk::Sampler GetHandle() const { + return sampler; + } + +private: + Instance& instance; + vk::Sampler sampler; }; } // namespace Vulkan diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 3dde933a1..cbaf26a73 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -56,9 +56,9 @@ struct OutputVertex { Common::Vec2 tc0; Common::Vec2 tc1; float24 tc0_w; - INSERT_PADDING_WORDS(1); + INSERT_PADDING_WORDS_NOINIT(1); Common::Vec3 view; - INSERT_PADDING_WORDS(1); + INSERT_PADDING_WORDS_NOINIT(1); Common::Vec2 tc2; static void ValidateSemantics(const RasterizerRegs& regs); diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 606762788..b010aaa6f 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -164,8 +164,10 @@ static void LogCritical(const char* msg) { void JitShader::Compile_Assert(bool condition, const char* msg) { if (!condition) { + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(ABI_PARAM1, reinterpret_cast(msg)); CallFarFunction(*this, LogCritical); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); } } @@ -595,11 +597,11 @@ void JitShader::Compile_END(Instruction instr) { } void JitShader::Compile_BREAKC(Instruction instr) { - Compile_Assert(looping, "BREAKC must be inside a LOOP"); - if (looping) { + Compile_Assert(loop_depth, "BREAKC must be inside a LOOP"); + if (loop_depth) { Compile_EvaluateCondition(instr); - ASSERT(loop_break_label); - jnz(*loop_break_label); + ASSERT(!loop_break_labels.empty()); + jnz(loop_break_labels.back(), T_NEAR); } } @@ -725,9 +727,11 @@ void JitShader::Compile_IF(Instruction instr) { void JitShader::Compile_LOOP(Instruction instr) { Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); - Compile_Assert(!looping, "Nested loops not supported"); - - looping = true; + Compile_Assert(loop_depth < 1, "Nested loops may not be supported"); + if (loop_depth++) { + const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT}); + ABI_PushRegistersAndAdjustStack(*this, loop_save_regs, 0); + } // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by @@ -746,16 +750,20 @@ void JitShader::Compile_LOOP(Instruction instr) { Label l_loop_start; L(l_loop_start); - loop_break_label = Xbyak::Label(); + loop_break_labels.emplace_back(Xbyak::Label()); Compile_Block(instr.flow_control.dest_offset + 1); add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component sub(LOOPCOUNT, 1); // Increment loop count by 1 jnz(l_loop_start); // Loop if not equal - L(*loop_break_label); - loop_break_label.reset(); - looping = false; + L(loop_break_labels.back()); + loop_break_labels.pop_back(); + + if (--loop_depth) { + const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT}); + ABI_PopRegistersAndAdjustStack(*this, loop_save_regs, 0); + } } void JitShader::Compile_JMP(Instruction instr) { @@ -892,7 +900,7 @@ void JitShader::Compile(const std::array* program_ // Reset flow control state program = (CompiledShader*)getCurr(); program_counter = 0; - looping = false; + loop_depth = 0; instruction_labels.fill(Xbyak::Label()); // Find all `CALL` instructions and identify return locations diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 507cd0ff3..573bdf8d3 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -120,15 +120,15 @@ private: /// Mapping of Pica VS instructions to pointers in the emitted code std::array instruction_labels; - /// Label pointing to the end of the current LOOP block. Used by the BREAKC instruction to break - /// out of the loop. - std::optional loop_break_label; + /// Labels pointing to the end of each nested LOOP block. Used by the BREAKC instruction to + /// break out of a loop. + std::vector loop_break_labels; /// Offsets in code where a return needs to be inserted std::vector return_offsets; unsigned program_counter = 0; ///< Offset of the next instruction to decode - bool looping = false; ///< True if compiling a loop, used to check for nested loops + u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled using CompiledShader = void(const void* setup, void* state, const u8* start_addr); CompiledShader* program = nullptr; diff --git a/src/video_core/swrasterizer/clipper.h b/src/video_core/swrasterizer/clipper.h index c9e14e3d7..7046cdc50 100644 --- a/src/video_core/swrasterizer/clipper.h +++ b/src/video_core/swrasterizer/clipper.h @@ -13,7 +13,7 @@ namespace Clipper { using Shader::OutputVertex; -void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2); +void ProcessTriangle(const & v0, const OutputVertex& v1, const OutputVertex& v2); } // namespace Clipper } // namespace Pica diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index e210801ff..a36f14b18 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -46,7 +46,6 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory) OpenGL::GLES = Settings::values.use_gles; - //g_renderer = std::make_unique(emu_window); g_renderer = std::make_unique(emu_window); ResultStatus result = g_renderer->Init(); diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 8534859b8..68eb4f6bb 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -6,8 +6,8 @@ #include #include -#include #include +#include #include "core/frontend/emu_window.h" namespace Frontend {