video_core: Rewrite to backend system

* Still doesn't build this is just a massive code dump from all the recent progress
This commit is contained in:
emufan4568
2022-08-08 00:00:52 +03:00
parent ff9b0dfe2f
commit 810df95b81
81 changed files with 6111 additions and 2832 deletions

View File

@ -157,6 +157,7 @@ set(REQUIRED_LIBRARIES
inih
lodepng
glslang
robin-hood-hashing
zstd
)
@ -297,6 +298,7 @@ set(REQUIRED_PACKAGES
zstd
unofficial-enet
lodepng
robin_hood
)
foreach(PACKAGE ${REQUIRED_PACKAGES})

View File

@ -9,7 +9,7 @@ endif()
# Configure vcpkg
set(VCPKG_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/externals/vcpkg")
if (WIN32)
execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat")
#execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat")
set(VCPKG_EXECUTABLE "${VCPKG_DIRECTORY}/vcpkg.exe")
else()
execute_process(COMMAND bash "${VCPKG_DIRECTORY}/bootstrap-vcpkg.sh")

View File

@ -450,7 +450,7 @@ void DspLle::SetServiceToInterrupt(std::weak_ptr<Service::DSP::DSP_DSP> dsp) {
return;
if (pipe == 0) {
// pipe 0 is for debug. 3DS automatically drains this pipe and discards the data
impl->ReadPipe(pipe, impl->GetPipeReadableSize(pipe));
impl->ReadPipe(static_cast<u8>(pipe), impl->GetPipeReadableSize(pipe));
} else {
std::lock_guard lock(HLE::g_hle_lock);
if (auto locked = dsp.lock()) {

View File

@ -944,16 +944,14 @@ void Config::SaveMultiplayerValues() {
// Write ban list
qt_config->beginWriteArray(QStringLiteral("username_ban_list"));
for (std::size_t i = 0; i < UISettings::values.ban_list.first.size(); ++i) {
int index = static_cast<int>(i);
qt_config->setArrayIndex(index);
qt_config->setArrayIndex(static_cast<int>(i));
WriteSetting(QStringLiteral("username"),
QString::fromStdString(UISettings::values.ban_list.first[i]));
}
qt_config->endArray();
qt_config->beginWriteArray(QStringLiteral("ip_ban_list"));
for (std::size_t i = 0; i < UISettings::values.ban_list.second.size(); ++i) {
int index = static_cast<int>(i);
qt_config->setArrayIndex(index);
qt_config->setArrayIndex(static_cast<int>(i));
WriteSetting(QStringLiteral("ip"),
QString::fromStdString(UISettings::values.ban_list.second[i]));
}

View File

@ -256,8 +256,7 @@ void ConfigureCamera::SetConfiguration() {
int index = GetSelectedCameraIndex();
for (std::size_t i = 0; i < Implementations.size(); i++) {
if (Implementations[i] == camera_name[index]) {
int current_index = static_cast<int>(i);
ui->image_source->setCurrentIndex(current_index);
ui->image_source->setCurrentIndex(static_cast<int>(i));
}
}
if (camera_name[index] == "image") {

View File

@ -76,7 +76,7 @@ void IPCRecorderWidget::OnEntryUpdated(IPCDebugger::RequestRecord record) {
QTreeWidgetItem entry{
{QString::number(record.id), GetStatusStr(record), service, GetFunctionName(record)}};
const int row_id = record.id - id_offset;
const std::size_t row_id = record.id - id_offset;
if (ui->main->invisibleRootItem()->childCount() > row_id) {
records[row_id] = record;
(*ui->main->invisibleRootItem()->child(row_id)) = entry;

View File

@ -45,7 +45,7 @@ private:
// The offset between record id and row id, assuming record ids are assigned
// continuously and only the 'Clear' action can be performed, this is enough.
// The initial value is 1, which means record 1 = row 0.
int id_offset = 1;
std::size_t id_offset = 1;
std::vector<IPCDebugger::RequestRecord> records;
};

View File

@ -60,6 +60,7 @@ add_library(common STATIC
detached_tasks.cpp
detached_tasks.h
bit_field.h
bit_field_array.h
bit_set.h
cityhash.cpp
cityhash.h
@ -72,6 +73,7 @@ add_library(common STATIC
file_util.h
flag.h
hash.h
intrusive_ptr.h
linear_disk_cache.h
logging/backend.cpp
logging/backend.h
@ -87,6 +89,8 @@ add_library(common STATIC
microprofile.h
microprofileui.h
misc.cpp
object_pool.cpp
object_pool.h
param_package.cpp
param_package.h
quaternion.h

View File

@ -36,6 +36,18 @@
#include "common/common_funcs.h"
#include "common/swap.h"
// User defined types to need to specialize this
template <typename T>
struct MakeUnsigned {
using type = std::make_unsigned_t<T>;
};
// Ensure that user defined types are sane
template <class T>
concept ValidType = requires(T t) {
static_cast<typename MakeUnsigned<T>::type>(t);
};
/*
* Abstract bitfield class
*
@ -110,6 +122,7 @@
*/
#pragma pack(1)
template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
requires ValidType<T>
struct BitField {
private:
// UnderlyingType is T for non-enum types and the underlying type of T if
@ -120,7 +133,7 @@ private:
std::enable_if<true, T>>::type;
// We store the value as the unsigned type to avoid undefined behaviour on value shifting
using StorageType = std::make_unsigned_t<UnderlyingType>;
using StorageType = typename MakeUnsigned<UnderlyingType>::type;
using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
@ -199,3 +212,38 @@ private:
template <std::size_t Position, std::size_t Bits, typename T>
using BitFieldBE = BitField<Position, Bits, T, BETag>;
/**
* Abstract bit flag class. This is basically a specialization of BitField for single-bit fields.
* Instead of being cast to the underlying type, it acts like a boolean.
*/
#pragma pack(1)
template <std::size_t Position, typename T, typename EndianTag = LETag>
struct BitFlag : protected BitField<Position, 1, T, EndianTag> {
private:
BitFlag(T val) = delete;
using ParentType = BitField<Position, 1, T>;
public:
BitFlag() = default;
BitFlag& operator=(const BitFlag&) = delete;
constexpr BitFlag& operator=(bool val) {
Assign(val);
return *this;
}
constexpr void Assign(bool value) {
ParentType::Assign(value);
}
[[nodiscard]] constexpr operator bool() const {
return Value();
}
[[nodiscard]] constexpr bool Value() const {
return ParentType::Value() != 0;
}
};
#pragma pack()

View File

@ -0,0 +1,287 @@
#pragma once
#include <cstddef>
#include <limits>
#include <type_traits>
#include "common/swap.h"
// Language limitations require the following to make these formattable
// (formatter<BitFieldArray<position, bits, size, T>::Ref> is not legal)
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayConstRef;
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayRef;
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayConstIterator;
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayIterator;
#pragma pack(1)
template <std::size_t position, std::size_t bits, std::size_t size, typename T,
// StorageType is T for non-enum types and the underlying type of T if
// T is an enumeration. Note that T is wrapped within an enable_if in the
// former case to workaround compile errors which arise when using
// std::underlying_type<T>::type directly.
typename StorageType = typename std::conditional_t<
std::is_enum<T>::value, std::underlying_type<T>, std::enable_if<true, T>>::type>
struct BitFieldArray
{
using Ref = BitFieldArrayRef<position, bits, size, T, StorageType>;
using ConstRef = BitFieldArrayConstRef<position, bits, size, T, StorageType>;
using Iterator = BitFieldArrayIterator<position, bits, size, T, StorageType>;
using ConstIterator = BitFieldArrayConstIterator<position, bits, size, T, StorageType>;
private:
// This constructor might be considered ambiguous:
// Would it initialize the storage or just the bitfield?
// Hence, delete it. Use the assignment operator to set bitfield values!
BitFieldArray(T val) = delete;
public:
// Force default constructor to be created
// so that we can use this within unions
constexpr BitFieldArray() = default;
// Initializer list constructor
constexpr BitFieldArray(std::initializer_list<T> items) : storage(StorageType{}) {
u32 index = 0;
for (auto& item : items) {
SetValue(index++, item);
}
}
// We explicitly delete the copy assignment operator here, because the
// default copy assignment would copy the full storage value, rather than
// just the bits relevant to this particular bit field.
// Ideally, we would just implement the copy assignment to copy only the
// relevant bits, but we're prevented from doing that because the savestate
// code expects that this class is trivially copyable.
BitFieldArray& operator=(const BitFieldArray&) = delete;
public:
constexpr bool IsSigned() const { return std::is_signed<T>(); }
constexpr std::size_t StartBit() const { return position; }
constexpr std::size_t NumBits() const { return bits; }
constexpr std::size_t Size() const { return size; }
constexpr std::size_t TotalNumBits() const { return bits * size; }
constexpr T Value(size_t index) const { return Value(std::is_signed<T>(), index); }
constexpr void SetValue(size_t index, T value) {
const size_t pos = position + bits * index;
storage = (storage & ~GetElementMask(index)) |
((static_cast<StorageType>(value) << pos) & GetElementMask(index));
}
Ref operator[](size_t index) { return Ref(this, index); }
constexpr const ConstRef operator[](size_t index) const { return ConstRef(this, index); }
constexpr Iterator begin() { return Iterator(this, 0); }
constexpr Iterator end() { return Iterator(this, size); }
constexpr ConstIterator begin() const { return ConstIterator(this, 0); }
constexpr ConstIterator end() const { return ConstIterator(this, size); }
constexpr ConstIterator cbegin() const { return begin(); }
constexpr ConstIterator cend() const { return end(); }
private:
// Unsigned version of StorageType
using StorageTypeU = std::make_unsigned_t<StorageType>;
constexpr T Value(std::true_type, size_t index) const
{
const size_t pos = position + bits * index;
const size_t shift_amount = 8 * sizeof(StorageType) - bits;
return static_cast<T>((storage << (shift_amount - pos)) >> shift_amount);
}
constexpr T Value(std::false_type, size_t index) const
{
const size_t pos = position + bits * index;
return static_cast<T>((storage & GetElementMask(index)) >> pos);
}
static constexpr StorageType GetElementMask(size_t index)
{
const size_t pos = position + bits * index;
return (std::numeric_limits<StorageTypeU>::max() >> (8 * sizeof(StorageType) - bits)) << pos;
}
StorageType storage;
static_assert(bits * size + position <= 8 * sizeof(StorageType), "Bitfield array out of range");
static_assert(sizeof(T) <= sizeof(StorageType), "T must fit in StorageType");
// And, you know, just in case people specify something stupid like bits=position=0x80000000
static_assert(position < 8 * sizeof(StorageType), "Invalid position");
static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
static_assert(bits > 0, "Invalid number of bits");
static_assert(size <= 8 * sizeof(StorageType), "Invalid size");
static_assert(size > 0, "Invalid size");
};
#pragma pack()
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayConstRef
{
friend struct BitFieldArray<position, bits, size, T, S>;
friend class BitFieldArrayConstIterator<position, bits, size, T, S>;
public:
constexpr T Value() const { return m_array->Value(m_index); };
constexpr operator T() const { return Value(); }
private:
constexpr BitFieldArrayConstRef(const BitFieldArray<position, bits, size, T, S>* array,
size_t index)
: m_array(array), m_index(index)
{
}
const BitFieldArray<position, bits, size, T, S>* const m_array;
const size_t m_index;
};
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayRef
{
friend struct BitFieldArray<position, bits, size, T, S>;
friend class BitFieldArrayIterator<position, bits, size, T, S>;
public:
constexpr T Value() const { return m_array->Value(m_index); };
constexpr operator T() const { return Value(); }
T operator=(const BitFieldArrayRef<position, bits, size, T, S>& value) const
{
m_array->SetValue(m_index, value);
return value;
}
T operator=(T value) const
{
m_array->SetValue(m_index, value);
return value;
}
private:
constexpr BitFieldArrayRef(BitFieldArray<position, bits, size, T, S>* array, size_t index)
: m_array(array), m_index(index)
{
}
BitFieldArray<position, bits, size, T, S>* const m_array;
const size_t m_index;
};
// Satisfies LegacyOutputIterator / std::output_iterator.
// Does not satisfy LegacyInputIterator / std::input_iterator as std::output_iterator_tag does not
// extend std::input_iterator_tag.
// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real
// references instead of proxy objects.
// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join.
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayIterator
{
friend struct BitFieldArray<position, bits, size, T, S>;
public:
using iterator_category = std::output_iterator_tag;
using value_type = T;
using difference_type = ptrdiff_t;
using pointer = void;
using reference = BitFieldArrayRef<position, bits, size, T, S>;
private:
constexpr BitFieldArrayIterator(BitFieldArray<position, bits, size, T, S>* array, size_t index)
: m_array(array), m_index(index)
{
}
public:
// Required by std::input_or_output_iterator
constexpr BitFieldArrayIterator() = default;
// Required by LegacyIterator
constexpr BitFieldArrayIterator(const BitFieldArrayIterator& other) = default;
// Required by LegacyIterator
BitFieldArrayIterator& operator=(const BitFieldArrayIterator& other) = default;
// Move constructor and assignment operators, explicitly defined for completeness
constexpr BitFieldArrayIterator(BitFieldArrayIterator&& other) = default;
BitFieldArrayIterator& operator=(BitFieldArrayIterator&& other) = default;
public:
BitFieldArrayIterator& operator++()
{
m_index++;
return *this;
}
BitFieldArrayIterator operator++(int)
{
BitFieldArrayIterator other(*this);
++*this;
return other;
}
constexpr reference operator*() const { return reference(m_array, m_index); }
constexpr bool operator==(BitFieldArrayIterator other) const { return m_index == other.m_index; }
constexpr bool operator!=(BitFieldArrayIterator other) const { return m_index != other.m_index; }
private:
BitFieldArray<position, bits, size, T, S>* m_array;
size_t m_index;
};
// Satisfies LegacyInputIterator / std::input_iterator.
// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real
// references instead of proxy objects.
// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join.
template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
class BitFieldArrayConstIterator
{
friend struct BitFieldArray<position, bits, size, T, S>;
public:
using iterator_category = std::input_iterator_tag;
using value_type = T;
using difference_type = ptrdiff_t;
using pointer = void;
using reference = BitFieldArrayConstRef<position, bits, size, T, S>;
private:
constexpr BitFieldArrayConstIterator(const BitFieldArray<position, bits, size, T, S>* array,
size_t index)
: m_array(array), m_index(index)
{
}
public:
// Required by std::input_or_output_iterator
constexpr BitFieldArrayConstIterator() = default;
// Required by LegacyIterator
constexpr BitFieldArrayConstIterator(const BitFieldArrayConstIterator& other) = default;
// Required by LegacyIterator
BitFieldArrayConstIterator& operator=(const BitFieldArrayConstIterator& other) = default;
// Move constructor and assignment operators, explicitly defined for completeness
constexpr BitFieldArrayConstIterator(BitFieldArrayConstIterator&& other) = default;
BitFieldArrayConstIterator& operator=(BitFieldArrayConstIterator&& other) = default;
public:
BitFieldArrayConstIterator& operator++()
{
m_index++;
return *this;
}
BitFieldArrayConstIterator operator++(int)
{
BitFieldArrayConstIterator other(*this);
++*this;
return other;
}
constexpr reference operator*() const { return reference(m_array, m_index); }
constexpr bool operator==(BitFieldArrayConstIterator other) const
{
return m_index == other.m_index;
}
constexpr bool operator!=(BitFieldArrayConstIterator other) const
{
return m_index != other.m_index;
}
private:
const BitFieldArray<position, bits, size, T, S>* m_array;
size_t m_index;
};

View File

@ -15,11 +15,19 @@
#define CONCAT2(x, y) DO_CONCAT2(x, y)
#define DO_CONCAT2(x, y) x##y
// helper macro to properly align structure members.
// Calling INSERT_PADDING_BYTES will add a new member variable with a name like "pad121",
// depending on the current source line to make sure variable names are unique.
#define INSERT_PADDING_BYTES(num_bytes) u8 CONCAT2(pad, __LINE__)[(num_bytes)]
#define INSERT_PADDING_WORDS(num_words) u32 CONCAT2(pad, __LINE__)[(num_words)]
/// Helper macros to insert unused bytes or words to properly align structs. These values will be
/// zero-initialized.
#define INSERT_PADDING_BYTES(num_bytes) \
[[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__) {}
#define INSERT_PADDING_WORDS(num_words) \
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__) {}
/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents.
/// This keeps the structure trivial to construct.
#define INSERT_PADDING_BYTES_NOINIT(num_bytes) \
[[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
#define INSERT_PADDING_WORDS_NOINIT(num_words) \
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
// Inlining
#ifdef _WIN32

View File

@ -11,6 +11,15 @@
namespace Common {
/**
* Disables rehashing for std::unordered_map
*/
struct IdentityHash {
u64 operator()(const u64 hash) const {
return hash;
}
};
/**
* Computes a 64-bit hash over the specified block of data
* @param data Block of data to compute hash over
@ -33,6 +42,14 @@ static inline u64 ComputeStructHash64(const T& data) noexcept {
return ComputeHash64(&data, sizeof(data));
}
/**
* Combines hash lhs with hash rhs providing a unique result.
*/
static inline std::size_t HashCombine(std::size_t lhs, std::size_t rhs) noexcept {
lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2);
return lhs;
}
/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
template <typename T>
struct HashableStruct {

261
src/common/intrusive_ptr.h Normal file
View File

@ -0,0 +1,261 @@
/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <cstddef>
#include <utility>
#include <memory>
#include <atomic>
#include <type_traits>
/// Simple reference counter for single threaded environments
class SingleThreadCounter {
public:
inline void AddRef() {
count++;
}
inline bool Release() {
return --count == 0;
}
private:
std::size_t count = 1;
};
/// Thread-safe reference counter with atomics
class MultiThreadCounter {
public:
MultiThreadCounter() {
count.store(1, std::memory_order_relaxed);
}
inline void AddRef() {
count.fetch_add(1, std::memory_order_relaxed);
}
inline bool Release() {
auto result = count.fetch_sub(1, std::memory_order_acq_rel);
return result == 1;
}
private:
std::atomic_size_t count;
};
template <typename T>
class IntrusivePtr;
template <typename T, typename Deleter = std::default_delete<T>,
typename ReferenceOps = SingleThreadCounter>
class IntrusivePtrEnabled {
public:
using IntrusivePtrType = IntrusivePtr<T>;
using EnabledBase = T;
using EnabledDeleter = Deleter;
using EnabledReferenceOp = ReferenceOps;
IntrusivePtrEnabled() = default;
IntrusivePtrEnabled(const IntrusivePtrEnabled &) = delete;
void operator=(const IntrusivePtrEnabled &) = delete;
/// Decrement the reference counter and optionally free the memory
inline void ReleaseRef() {
if (ref_counter.Release()) {
Deleter()(static_cast<T*>(this));
}
}
/// Increment the reference counter
inline void AddRef() {
ref_counter.AddRef();
}
protected:
IntrusivePtr<T> RefFromThis();
private:
ReferenceOps ref_counter;
};
/**
* Lightweight alternative to std::shared_ptr for reference counting
* usecases
*/
template <typename T>
class IntrusivePtr {
using ReferenceBase = IntrusivePtrEnabled<
typename T::EnabledBase,
typename T::EnabledDeleter,
typename T::EnabledReferenceOp>;
template <typename U>
friend class IntrusivePtr;
public:
IntrusivePtr() = default;
explicit IntrusivePtr(T *handle) : data(handle) {}
template <typename U>
IntrusivePtr(const IntrusivePtr<U> &other) {
*this = other;
}
IntrusivePtr(const IntrusivePtr &other) {
*this = other;
}
template <typename U>
IntrusivePtr(IntrusivePtr<U> &&other) noexcept {
*this = std::move(other);
}
IntrusivePtr(IntrusivePtr &&other) noexcept {
*this = std::move(other);
}
~IntrusivePtr() {
Reset();
}
/// Returns a reference to the underlying data
T& operator*() {
return *data;
}
/// Returns an immutable reference to the underlying data
const T& operator*() const {
return *data;
}
/// Returns a pointer to the underlying data
T* operator->() {
return data;
}
/// Returns an immutable pointer to the underlying data
const T* operator->() const {
return data;
}
/// Returns true if the underlaying pointer it valid
bool IsValid() const {
return data != nullptr;
}
/// Default comparison operators
auto operator<=>(const IntrusivePtr& other) const = default;
/// Returns the raw pointer to the data
T* Get() {
return data;
}
/// Returns an immutable raw pointer to the data
const T* Get() const {
return data;
}
void Reset() {
// Static up-cast here to avoid potential issues with multiple intrusive inheritance.
// Also makes sure that the pointer type actually inherits from this type.
if (data)
static_cast<ReferenceBase*>(data)->ReleaseRef();
data = nullptr;
}
template <typename U>
IntrusivePtr& operator=(const IntrusivePtr<U>& other) {
static_assert(std::is_base_of_v<T, U>, "Cannot safely assign downcasted intrusive pointers.");
Reset();
data = static_cast<T*>(other.data);
// Static up-cast here to avoid potential issues with multiple intrusive inheritance.
// Also makes sure that the pointer type actually inherits from this type.
if (data) {
static_cast<ReferenceBase*>(data)->ReleaseRef();
}
return *this;
}
IntrusivePtr& operator=(const IntrusivePtr& other) {
if (this != &other) {
Reset();
data = other.data;
if (data)
static_cast<ReferenceBase*>(data)->AddRef();
}
return *this;
}
template <typename U>
IntrusivePtr &operator=(IntrusivePtr<U> &&other) noexcept {
Reset();
data = std::exchange(other.data, nullptr);
return *this;
}
IntrusivePtr &operator=(IntrusivePtr &&other) noexcept {
if (this != &other) {
Reset();
data = std::exchange(other.data, nullptr);
}
return *this;
}
T* Release() & {
return std::exchange(data, nullptr);
}
T* Release() && {
return std::exchange(data, nullptr);
}
private:
T* data = nullptr;
};
template <typename T, typename Deleter, typename ReferenceOps>
IntrusivePtr<T> IntrusivePtrEnabled<T, Deleter, ReferenceOps>::RefFromThis() {
AddRef();
return IntrusivePtr<T>(static_cast<T*>(this));
}
template <typename Derived>
using DerivedIntrusivePtrType = IntrusivePtr<Derived>;
template <typename T, typename... P>
DerivedIntrusivePtrType<T> MakeHandle(P &&... p) {
return DerivedIntrusivePtrType<T>(new T(std::forward<P>(p)...));
}
template <typename Base, typename Derived, typename... P>
typename Base::IntrusivePtrType MakeDerivedHandle(P &&... p) {
return typename Base::IntrusivePtrType(new Derived(std::forward<P>(p)...));
}
template <typename T>
using ThreadSafeIntrusivePtrEnabled = IntrusivePtrEnabled<T, std::default_delete<T>, MultiThreadCounter>;

View File

@ -0,0 +1,70 @@
/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "common/object_pool.h"
#include <cstdlib>
#include <cstring>
#ifdef _WIN32
#include <malloc.h>
#endif
void* memalign_alloc(size_t boundary, size_t size) {
#if defined(_WIN32)
return _aligned_malloc(size, boundary);
#elif defined(_ISOC11_SOURCE)
return aligned_alloc(boundary, size);
#elif (_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600)
void *ptr = nullptr;
if (posix_memalign(&ptr, boundary, size) < 0) {
return nullptr;
}
return ptr;
#else
// Align stuff ourselves. Kinda ugly, but will work anywhere.
void **place;
uintptr_t addr = 0;
void *ptr = malloc(boundary + size + sizeof(uintptr_t));
if (ptr == nullptr) {
return nullptr;
}
addr = ((uintptr_t)ptr + sizeof(uintptr_t) + boundary) & ~(boundary - 1);
place = (void **) addr;
place[-1] = ptr;
return (void *) addr;
#endif
}
void memalign_free(void *ptr) {
#if defined(_WIN32)
_aligned_free(ptr);
#elif !defined(_ISOC11_SOURCE) && !((_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600))
if (ptr != nullptr) {
void **p = (void **) ptr;
free(p[-1]);
}
#else
free(ptr);
#endif
}

148
src/common/object_pool.h Normal file
View File

@ -0,0 +1,148 @@
/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <memory>
#include <mutex>
#include <vector>
#include <algorithm>
#include <cstdlib>
void *memalign_alloc(size_t boundary, size_t size);
void memalign_free(void *ptr);
template <typename T>
struct AlignedAllocation {
static void* operator new(size_t size) {
void* ret = memalign_alloc(alignof(T), size);
if (!ret) throw std::bad_alloc();
return ret;
}
static void* operator new[](size_t size) {
void* ret = memalign_alloc(alignof(T), size);
if (!ret) throw std::bad_alloc();
return ret;
}
static void operator delete(void *ptr) {
return memalign_free(ptr);
}
static void operator delete[](void *ptr) {
return memalign_free(ptr);
}
};
/**
* Allocates objects of type T in batches of 64 * n where
* n is the number of times the pool has grown. So the first
* time it will allocate 64, then 128 objects etc.
*/
template<typename T>
class ObjectPool {
public:
template<typename... P>
T* Allocate(P&&... p) {
#ifndef OBJECT_POOL_DEBUG
if (vacants.empty()) {
unsigned num_objects = 64u << memory.size();
T *ptr = static_cast<T*>(memalign_alloc(std::max(64, alignof(T)),
num_objects * sizeof(T)));
if (!ptr) {
return nullptr;
}
for (unsigned i = 0; i < num_objects; i++) {
vacants.push_back(&ptr[i]);
}
memory.emplace_back(ptr);
}
T *ptr = vacants.back();
vacants.pop_back();
new(ptr) T(std::forward<P>(p)...);
return ptr;
#else
return new T(std::forward<P>(p)...);
#endif
}
void Free(T *ptr) {
#ifndef OBJECT_POOL_DEBUG
ptr->~T();
vacants.push_back(ptr);
#else
delete ptr;
#endif
}
void Clear() {
#ifndef OBJECT_POOL_DEBUG
vacants.clear();
memory.clear();
#endif
}
protected:
#ifndef OBJECT_POOL_DEBUG
std::vector<T*> vacants;
struct MallocDeleter {
void operator()(T *ptr) {
memalign_free(ptr);
}
};
std::vector<std::unique_ptr<T, MallocDeleter>> memory;
#endif
};
template<typename T>
class ThreadSafeObjectPool : private ObjectPool<T> {
public:
template<typename... P>
T* Allocate(P &&... p) {
std::lock_guard<std::mutex> holder{lock};
return ObjectPool<T>::Allocate(std::forward<P>(p)...);
}
void Free(T *ptr) {
#ifndef OBJECT_POOL_DEBUG
ptr->~T();
std::lock_guard<std::mutex> holder{lock};
this->vacants.push_back(ptr);
#else
delete ptr;
#endif
}
void Clear() {
std::lock_guard<std::mutex> holder{lock};
ObjectPool<T>::Clear();
}
private:
std::mutex lock;
};

View File

@ -158,10 +158,10 @@ struct ABIFrameInfo {
inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment,
std::size_t needed_frame_size) {
int count = (regs & ABI_ALL_GPRS).count();
std::size_t count = (regs & ABI_ALL_GPRS).count();
rsp_alignment -= count * 8;
std::size_t subtraction = 0;
int xmm_count = (regs & ABI_ALL_XMMS).count();
std::size_t xmm_count = (regs & ABI_ALL_XMMS).count();
if (xmm_count) {
// If we have any XMMs to save, we must align the stack here.
subtraction = rsp_alignment & 0xF;

View File

@ -29,11 +29,9 @@ enum class LayoutOption {
SingleScreen,
LargeScreen,
SideScreen,
// Similiar to default, but better for mobile devices in portrait mode. Top screen in clamped to
// the top of the frame, and the bottom screen is enlarged to match the top screen.
MobilePortrait,
// Similiar to LargeScreen, but better for mobile devices in landscape mode. The screens are
// clamped to the top of the frame, and the bottom screen is a bit bigger.
MobileLandscape,
@ -116,7 +114,6 @@ namespace NativeAnalog {
enum Values {
CirclePad,
CStick,
NumAnalogs,
};

View File

@ -9,12 +9,12 @@ add_library(video_core STATIC
pica.cpp
pica.h
pica_state.h
pica_types.h
primitive_assembly.cpp
primitive_assembly.h
rasterizer_interface.h
regs.cpp
regs.h
pica_regs.inc
pica.cpp
pica.h
regs_framebuffer.h
regs_lighting.h
regs_pipeline.h
@ -23,6 +23,15 @@ add_library(video_core STATIC
regs_texturing.h
renderer_base.cpp
renderer_base.h
common/backend.h
common/buffer.h
common/framebuffer.h
common/pica_types.h
common/shader_gen.cpp
common/shader_gen.h
common/shader.h
common/texture.h
common/pipeline.h
renderer_opengl/frame_dumper_opengl.cpp
renderer_opengl/frame_dumper_opengl.h
renderer_opengl/gl_rasterizer.cpp
@ -73,16 +82,21 @@ add_library(video_core STATIC
renderer_vulkan/pica_to_vulkan.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_backend.cpp
renderer_vulkan/vk_backend.h
renderer_vulkan/vk_buffer.cpp
renderer_vulkan/vk_buffer.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_format_reinterpreter.cpp
renderer_vulkan/vk_format_reinterpreter.h
renderer_vulkan/vk_format_util.cpp
renderer_vulkan/vk_format_util.h
renderer_vulkan/vk_instance.cpp
renderer_vulkan/vk_instance.h
renderer_vulkan/vk_pipeline_builder.cpp
renderer_vulkan/vk_pipeline_builder.h
renderer_vulkan/vk_pipeline.cpp
renderer_vulkan/vk_pipeline.h
renderer_vulkan/vk_platform.h
renderer_vulkan/vk_rasterizer_cache.cpp
renderer_vulkan/vk_rasterizer_cache.h
renderer_vulkan/vk_rasterizer.cpp
@ -90,6 +104,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_shader_state.h
renderer_vulkan/vk_shader_gen.cpp
renderer_vulkan/vk_shader_gen.h
renderer_vulkan/vk_shader.cpp
renderer_vulkan/vk_shader.h
renderer_vulkan/vk_state.cpp
renderer_vulkan/vk_state.h
renderer_vulkan/vk_surface_params.cpp
@ -180,7 +196,7 @@ target_link_libraries(video_core PRIVATE glad::glad glm::glm nihstro-headers Boo
# Include Vulkan headers
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_include_directories(video_core PRIVATE ../../externals/vma/include)
target_link_libraries(video_core PRIVATE glslang SPIRV glslang-default-resource-limits OGLCompiler)
target_link_libraries(video_core PRIVATE glslang SPIRV robin_hood::robin_hood)
if (ARCHITECTURE_x86_64)
target_link_libraries(video_core PUBLIC xbyak::xbyak)

View File

@ -0,0 +1,60 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/object_pool.h"
#include "common/vector_math.h"
#include "video_core/common/pipeline.h"
#include "video_core/common/framebuffer.h"
namespace Frontend {
class EmuWindow;
}
namespace VideoCore {
/// Common interface of a video backend
class BackendBase {
public:
BackendBase(Frontend::EmuWindow& window) : window(window) {}
virtual ~BackendBase() = default;
// Triggers a swapchain buffer swap
virtual void SwapBuffers();
// Creates a backend specific texture handle
virtual TextureHandle CreateTexture(TextureInfo info) = 0;
// Creates a backend specific buffer handle
virtual BufferHandle CreateBuffer(BufferInfo info) = 0;
// Creates a backend specific framebuffer handle
virtual FramebufferHandle CreateFramebuffer(FramebufferInfo info) = 0;
// Creates a backend specific pipeline handle
virtual PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) = 0;
// Creates a backend specific sampler object
virtual SamplerHandle CreateSampler(SamplerInfo info) = 0;
// Start a draw operation
virtual void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
BufferHandle vertex_buffer,
u32 base_vertex, u32 num_vertices) = 0;
// Start an indexed draw operation
virtual void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
BufferHandle vertex_buffer, BufferHandle index_buffer,
u32 base_index, u32 num_indices, u32 base_vertex) = 0;
// Executes a compute shader
virtual void DispatchCompute(PipelineHandle pipeline, Common::Vec3<u32> groupsize,
Common::Vec3<u32> groups) = 0;
private:
Frontend::EmuWindow& window;
};
} // namespace VideoCore

View File

@ -0,0 +1,102 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include "common/hash.h"
#include "common/intrusive_ptr.h"
namespace VideoCore {
enum class BufferUsage : u8 {
Vertex = 0,
Index = 1,
Uniform = 2,
Texel = 3,
Staging = 4,
Undefined = 255
};
enum class ViewFormat : u8 {
R32Float = 0,
R32G32Float = 1,
R32G32B32Float = 2,
R32G32B32A32Float = 3,
Undefined = 255
};
constexpr u32 MAX_BUFFER_VIEWS = 3;
struct BufferInfo {
u32 capacity = 0;
BufferUsage usage = BufferUsage::Undefined;
std::array<ViewFormat, MAX_BUFFER_VIEWS> views{ViewFormat::Undefined};
const u64 Hash() const {
return Common::ComputeStructHash64(*this);
}
};
static_assert(sizeof(BufferInfo) == 8, "BufferInfo not packed!");
static_assert(std::is_standard_layout_v<BufferInfo>, "BufferInfo is not a standard layout!");
class BufferBase : public IntrusivePtrEnabled<BufferBase> {
public:
BufferBase() = default;
BufferBase(const BufferInfo& info) : info(info) {}
virtual ~BufferBase() = default;
/// Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
/// and the optional alignment requirement.
/// The actual used size must be specified on unmapping the chunk.
virtual std::span<u8> Map(u32 size, u32 alignment = 0) {};
/// Flushes write to buffer memory
virtual void Commit(u32 size = 0) {};
/// Returns the size of the buffer in bytes
u32 GetCapacity() const {
return info.capacity;
}
/// Returns the usage of the buffer
BufferUsage GetUsage() const {
return info.usage;
}
/// Returns the starting offset of the currently mapped buffer slice
u64 GetCurrentOffset() const {
return buffer_offset;
}
/// Returns whether the buffer was invalidated by the most recent Map call
bool IsInvalid() const {
return invalid;
}
/// Invalidates the buffer
void Invalidate() {
buffer_offset = 0;
invalid = true;
}
protected:
BufferInfo info{};
u32 buffer_offset = 0;
bool invalid = false;
};
using BufferHandle = IntrusivePtr<BufferBase>;
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::BufferInfo> {
std::size_t operator()(const VideoCore::BufferInfo& info) const noexcept {
return info.Hash();
}
};
} // namespace std

View File

@ -0,0 +1,69 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/common/texture.h"
namespace VideoCore {
enum class MSAASamples : u32 {
x1,
x2,
x4,
x8
};
/**
* Information about a framebuffer
*/
struct FramebufferInfo {
TextureHandle color;
TextureHandle depth_stencil;
MSAASamples samples = MSAASamples::x1;
Rect2D draw_rect{};
/// Hashes the framebuffer object and returns a unique identifier
const u64 Hash() const {
// The only member IntrusivePtr has is a pointer to the
// handle so it's fine hash it
return Common::ComputeStructHash64(*this);
}
};
/**
* A framebuffer is a collection of render targets and their configuration
*/
class FramebufferBase : public IntrusivePtrEnabled<FramebufferBase> {
public:
FramebufferBase(const FramebufferInfo& info) : info(info) {}
virtual ~FramebufferBase() = default;
/// Returns an immutable reference to the color attachment
const TextureHandle& GetColorAttachment() const {
return info.color;
}
/// Returns an immutable reference to the depth/stencil attachment
const TextureHandle& GetDepthStencilAttachment() const {
return info.depth_stencil;
}
/// Returns how many samples the framebuffer takes
MSAASamples GetMSAASamples() const {
return info.samples;
}
/// Returns the rendering area
Rect2D GetDrawRectangle() const {
return info.draw_rect;
}
protected:
FramebufferInfo info;
};
using FramebufferHandle = IntrusivePtr<FramebufferBase>;
} // namespace VideoCore

View File

@ -0,0 +1,157 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cmath>
#include <cstring>
#include <boost/serialization/access.hpp>
#include "common/common_types.h"
namespace Pica {
/**
* Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision
* floating point.
*
* When decoding, format is as follows:
* - The first `M` bits are the mantissa
* - The next `E` bits are the exponent
* - The last bit is the sign bit
*
* @todo Verify on HW if this conversion is sufficiently accurate.
*/
template <u32 M, u32 E>
struct Float {
static constexpr u32 width = M + E + 1;
static constexpr u32 bias = 128 - (1 << (E - 1));
static constexpr u32 exponent_mask = (1 << E) - 1;
static constexpr u32 mantissa_mask = (1 << M) - 1;
static constexpr u32 sign_mask = 1 << (E + M);
public:
static Float FromFloat32(float val) {
Float ret;
ret.value = val;
return ret;
}
static Float FromRaw(u32 hex) {
Float res;
u32 exponent = (hex >> M) & exponent_mask;
const u32 mantissa = hex & mantissa_mask;
const u32 sign = (hex & sign_mask) << (31 - M - E);
if (hex & (mantissa_mask | (exponent_mask << M))) {
if (exponent == exponent_mask) {
exponent = 255;
} else {
exponent += bias;
}
hex = sign | (mantissa << (23 - M)) | (exponent << 23);
} else {
hex = sign;
}
std::memcpy(&res.value, &hex, sizeof(float));
return res;
}
static Float Zero() {
return FromFloat32(0.f);
}
// Not recommended for anything but logging
float ToFloat32() const {
return value;
}
Float operator*(const Float& flt) const {
float result = value * flt.ToFloat32();
// PICA gives 0 instead of NaN when multiplying by inf
if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) {
result = 0.f;
}
return Float::FromFloat32(result);
}
Float operator/(const Float& flt) const {
return Float::FromFloat32(ToFloat32() / flt.ToFloat32());
}
Float operator+(const Float& flt) const {
return Float::FromFloat32(ToFloat32() + flt.ToFloat32());
}
Float operator-(const Float& flt) const {
return Float::FromFloat32(ToFloat32() - flt.ToFloat32());
}
Float& operator*=(const Float& flt) {
value = operator*(flt).value;
return *this;
}
Float& operator/=(const Float& flt) {
value /= flt.ToFloat32();
return *this;
}
Float& operator+=(const Float& flt) {
value += flt.ToFloat32();
return *this;
}
Float& operator-=(const Float& flt) {
value -= flt.ToFloat32();
return *this;
}
Float operator-() const {
return Float::FromFloat32(-ToFloat32());
}
bool operator<(const Float& flt) const {
return ToFloat32() < flt.ToFloat32();
}
bool operator>(const Float& flt) const {
return ToFloat32() > flt.ToFloat32();
}
bool operator>=(const Float& flt) const {
return ToFloat32() >= flt.ToFloat32();
}
bool operator<=(const Float& flt) const {
return ToFloat32() <= flt.ToFloat32();
}
bool operator==(const Float& flt) const {
return ToFloat32() == flt.ToFloat32();
}
bool operator!=(const Float& flt) const {
return ToFloat32() != flt.ToFloat32();
}
private:
// Stored as a regular float, merely for convenience
// TODO: Perform proper arithmetic on this!
float value;
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
ar& value;
}
};
using Float24 = Float<16, 7>;
using Float20 = Float<12, 7>;
using Float16 = Float<10, 5>;
} // namespace Pica

View File

@ -0,0 +1,223 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
#include "common/bit_field_array.h"
#include "common/hash.h"
#include "video_core/common/buffer.h"
#include "video_core/common/texture.h"
#include "video_core/common/shader.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_pipeline.h"
namespace VideoCore {
constexpr u32 MAX_SHADER_STAGES = 3;
constexpr u32 MAX_VERTEX_ATTRIBUTES = 8;
constexpr u32 MAX_BINDINGS_IN_GROUP = 7;
constexpr u32 MAX_BINDING_GROUPS = 6;
enum class PipelineType : u8 {
Compute = 0,
Graphics = 1
};
enum class BindingType : u32 {
None = 0,
Uniform = 1,
UniformDynamic = 2,
TexelBuffer = 3,
Texture = 4,
Sampler = 5,
StorageImage = 6
};
using BindingGroup = BitFieldArray<0, 3, MAX_BINDINGS_IN_GROUP, BindingType>;
/**
* Describes all the resources used in the pipeline
*/
struct PipelineLayoutInfo {
u8 group_count = 0;
std::array<BindingGroup, MAX_BINDING_GROUPS> binding_groups{};
u8 push_constant_block_size = 0;
};
/**
* The pipeline state is tightly packed with bitfields to reduce
* the overhead of hashing as much as possible
*/
union RasterizationState {
u8 value = 0;
BitField<0, 2, Pica::TriangleTopology> topology;
BitField<4, 2, Pica::CullMode> cull_mode;
};
union DepthStencilState {
u64 value = 0;
BitField<0, 1, u64> depth_test_enable;
BitField<1, 1, u64> depth_write_enable;
BitField<2, 1, u64> stencil_test_enable;
BitField<3, 3, Pica::CompareFunc> depth_compare_op;
BitField<6, 3, Pica::StencilAction> stencil_fail_op;
BitField<9, 3, Pica::StencilAction> stencil_pass_op;
BitField<12, 3, Pica::StencilAction> stencil_depth_fail_op;
BitField<15, 3, Pica::CompareFunc> stencil_compare_op;
BitField<18, 8, u64> stencil_reference;
BitField<26, 8, u64> stencil_compare_mask;
BitField<34, 8, u64> stencil_write_mask;
};
union BlendState {
u32 value = 0;
BitField<0, 4, Pica::BlendFactor> src_color_blend_factor;
BitField<4, 4, Pica::BlendFactor> dst_color_blend_factor;
BitField<8, 3, Pica::BlendEquation> color_blend_eq;
BitField<11, 4, Pica::BlendFactor> src_alpha_blend_factor;
BitField<15, 4, Pica::BlendFactor> dst_alpha_blend_factor;
BitField<19, 3, Pica::BlendEquation> alpha_blend_eq;
BitField<22, 4, u32> color_write_mask;
};
enum class AttribType : u8 {
Float = 0,
Int = 1,
Short = 2
};
union VertexAttribute {
u8 value = 0;
BitField<0, 2, AttribType> type;
BitField<2, 3, u8> components;
};
#pragma pack(1)
struct VertexLayout {
u8 stride = 0;
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
};
#pragma pack()
/**
* Information about a graphics/compute pipeline
*/
#pragma pack(1)
struct PipelineInfo {
std::array<ShaderHandle, MAX_SHADER_STAGES> shaders{};
VertexLayout vertex_layout{};
PipelineLayoutInfo layout{};
BlendState blending{};
DepthStencilState depth_stencil{};
RasterizationState rasterization{};
const u64 Hash() const {
return Common::ComputeStructHash64(*this);
}
};
#pragma pack()
class PipelineBase : public IntrusivePtrEnabled<PipelineBase> {
public:
PipelineBase(PipelineType type, PipelineInfo info) :
type(type), info(info) {}
virtual ~PipelineBase() = default;
// Disable copy constructor
PipelineBase(const PipelineBase&) = delete;
PipelineBase& operator=(const PipelineBase&) = delete;
// Binds the texture in the specified slot
virtual void BindTexture(u32 group, u32 slot, TextureHandle handle) = 0;
// Binds the texture in the specified slot
virtual void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) = 0;
// Binds the sampler in the specified slot
virtual void BindSampler(u32 group, u32 slot, SamplerHandle handle) = 0;
/// Sets the primitive topology
void SetTopology(Pica::TriangleTopology topology) {
info.rasterization.topology.Assign(topology);
}
/// Sets the culling mode
void SetCullMode(Pica::CullMode mode) {
info.rasterization.cull_mode.Assign(mode);
}
/// Configures the color blending function
void SetColorBlendFunc(Pica::BlendFactor src_color_factor,
Pica::BlendFactor dst_color_factor,
Pica::BlendEquation color_eq) {
info.blending.src_color_blend_factor.Assign(src_color_factor);
info.blending.dst_color_blend_factor.Assign(dst_color_factor);
info.blending.color_blend_eq.Assign(color_eq);
}
/// Configures the alpha blending function
void SetAlphaBlendFunc(Pica::BlendFactor src_alpha_factor,
Pica::BlendFactor dst_alpha_factor,
Pica::BlendEquation alpha_eq) {
info.blending.src_alpha_blend_factor.Assign(src_alpha_factor);
info.blending.dst_alpha_blend_factor.Assign(dst_alpha_factor);
info.blending.alpha_blend_eq.Assign(alpha_eq);
}
/// Sets the color write mask
void SetColorWriteMask(u32 mask) {
info.blending.color_write_mask.Assign(mask);
}
/// Configures the depth test
void SetDepthTest(bool enable, Pica::CompareFunc compare_op) {
info.depth_stencil.depth_test_enable.Assign(enable);
info.depth_stencil.depth_compare_op.Assign(compare_op);
}
/// Enables or disables depth writes
void SetDepthWrites(bool enable) {
info.depth_stencil.depth_write_enable.Assign(enable);
}
/// Configures the stencil test
void SetStencilTest(bool enable, Pica::StencilAction fail, Pica::StencilAction pass,
Pica::StencilAction depth_fail, Pica::CompareFunc compare, u32 ref) {
info.depth_stencil.stencil_test_enable.Assign(enable);
info.depth_stencil.stencil_fail_op.Assign(fail);
info.depth_stencil.stencil_pass_op.Assign(pass);
info.depth_stencil.stencil_depth_fail_op.Assign(depth_fail);
info.depth_stencil.stencil_compare_op.Assign(compare);
info.depth_stencil.stencil_reference.Assign(ref);
}
/// Selects the bits of the stencil values participating in the stencil test
void SetStencilCompareMask(u32 mask) {
info.depth_stencil.stencil_compare_mask.Assign(mask);
}
/// Selects the bits of the stencil values updated by the stencil test
void SetStencilWriteMask(u32 mask) {
info.depth_stencil.stencil_write_mask.Assign(mask);
}
protected:
PipelineType type = PipelineType::Graphics;
PipelineInfo info{};
};
using PipelineHandle = IntrusivePtr<PipelineBase>;
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::PipelineInfo> {
std::size_t operator()(const VideoCore::PipelineInfo& info) const noexcept {
return info.Hash();
}
};
} // namespace std

View File

@ -0,0 +1,62 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <string_view>
#include <vector>
#include "common/common_types.h"
#include "common/intrusive_ptr.h"
namespace VideoCore {
enum class ShaderStage : u32 {
Vertex = 0,
Geometry = 1,
Fragment = 2,
Compute = 3,
Undefined = 4
};
// Tells the module how much to optimize the bytecode
enum class ShaderOptimization : u32 {
High = 0,
Debug = 1
};
/// Compiles shader source to backend representation
class ShaderBase : public IntrusivePtrEnabled<ShaderBase> {
public:
ShaderBase(ShaderStage stage, std::string_view name, std::string&& source) :
name(name), stage(stage), source(source) {}
virtual ~ShaderBase() = default;
/// Compiles the shader source code
virtual bool Compile(ShaderOptimization level) = 0;
/// Returns the API specific shader bytecode
std::string_view GetSource() const {
return source;
}
/// Returns the name given the shader module
std::string_view GetName() const {
return name;
}
/// Returns the pipeline stage the shader is assigned to
ShaderStage GetStage() const {
return stage;
}
protected:
std::string_view name = "None";
ShaderStage stage = ShaderStage::Undefined;
std::string source;
};
using ShaderHandle = IntrusivePtr<ShaderBase>;
} // namespace VideoCore

View File

@ -0,0 +1,179 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_set.h"
#include "video_core/video_core.h"
#include "video_core/common/shader_gen.h"
namespace VideoCore {
PicaFSConfig::PicaFSConfig(const Pica::Regs& regs) {
scissor_test_mode = regs.rasterizer.scissor_test.mode;
depthmap_enable = regs.rasterizer.depthmap_enable;
alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
? regs.framebuffer.output_merger.alpha_test.func.Value()
: Pica::CompareFunc::Always;
texture0_type = regs.texturing.texture0.type;
texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
// We don't need these otherwise, reset them to avoid unnecessary shader generation
alphablend_enable = {};
logic_op = {};
// Copy relevant tev stages fields.
// We don't sync const_color here because of the high variance, it is a
// shader uniform instead.
const auto stages = regs.texturing.GetTevStages();
DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
for (std::size_t i = 0; i < stages.size(); i++) {
const auto& tev_stage = stages[i];
tev_stages[i].sources_raw = tev_stage.sources_raw;
tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
tev_stages[i].ops_raw = tev_stage.ops_raw;
tev_stages[i].scales_raw = tev_stage.scales_raw;
}
fog_mode = regs.texturing.fog_mode;
fog_flip = regs.texturing.fog_flip != 0;
combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
<< 4;
// Fragment lighting
lighting.enable = !regs.lighting.disable;
lighting.src_num = regs.lighting.max_light_index + 1;
for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) {
u32 num = regs.lighting.light_enable.GetNum(light_index);
const auto& light = regs.lighting.light[num];
auto& dst_light = lighting.light[light_index];
dst_light.num = num;
dst_light.directional = light.config.directional != 0;
dst_light.two_sided_diffuse = light.config.two_sided_diffuse != 0;
dst_light.geometric_factor_0 = light.config.geometric_factor_0 != 0;
dst_light.geometric_factor_1 = light.config.geometric_factor_1 != 0;
dst_light.dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
dst_light.spot_atten_enable = !regs.lighting.IsSpotAttenDisabled(num);
dst_light.shadow_enable = !regs.lighting.IsShadowDisabled(num);
}
lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
// This is a dummy field due to lack of the corresponding register
lighting.lut_sp.enable = true;
lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
lighting.config = regs.lighting.config0.config;
lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
lighting.bump_mode = regs.lighting.config0.bump_mode;
lighting.bump_selector = regs.lighting.config0.bump_selector;
lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
lighting.shadow_selector = regs.lighting.config0.shadow_selector;
proctex.enable = regs.texturing.main_config.texture3_enable;
if (proctex.enable) {
proctex.coord = regs.texturing.main_config.texture3_coordinates;
proctex.u_clamp = regs.texturing.proctex.u_clamp;
proctex.v_clamp = regs.texturing.proctex.v_clamp;
proctex.color_combiner = regs.texturing.proctex.color_combiner;
proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
proctex.noise_enable = regs.texturing.proctex.noise_enable;
proctex.u_shift = regs.texturing.proctex.u_shift;
proctex.v_shift = regs.texturing.proctex.v_shift;
proctex.lut_width = regs.texturing.proctex_lut.width;
proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
proctex.lod_min = regs.texturing.proctex_lut.lod_min;
proctex.lod_max = regs.texturing.proctex_lut.lod_max;
proctex.lut_filter = regs.texturing.proctex_lut.filter;
}
shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
Pica::FragmentOperationMode::Shadow;
shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
}
PicaVSConfig::PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
program_hash = setup.GetProgramCodeHash();
swizzle_hash = setup.GetSwizzleDataHash();
main_offset = regs.main_offset;
sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
num_outputs = 0;
output_map.fill(16);
for (int reg : Common::BitSet<u32>(regs.output_mask)) {
output_map[reg] = num_outputs++;
}
}
PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs) {
vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
gs_output_attributes = vs_output_attributes;
semantic_maps.fill({16, 0});
for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
const std::array semantics = {
regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
};
for (u32 comp = 0; comp < 4; ++comp) {
const std::size_t semantic = static_cast<std::size_t>(semantics[comp]);
if (semantic < 24) {
semantic_maps[semantic] = {attrib, comp};
} else if (semantic != Pica::RasterizerRegs::VSOutputAttributes::INVALID) {
LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
}
}
}
}
} // namespace VideoCore

View File

@ -0,0 +1,227 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <string>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
namespace VideoCore {
enum Attributes {
ATTRIBUTE_POSITION,
ATTRIBUTE_COLOR,
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
Pica::TexturingRegs::TevStageConfig stage;
stage.sources_raw = sources_raw;
stage.modifiers_raw = modifiers_raw;
stage.ops_raw = ops_raw;
stage.const_color = 0;
stage.scales_raw = scales_raw;
return stage;
}
};
/**
* This struct contains all state used to generate the GLSL fragment shader that emulates the
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
*/
struct PicaFSConfig {
explicit PicaFSConfig(const Pica::Regs& regs);
/// Returns the hash of the VS config
const u64 Hash() const noexcept {
return Common::ComputeStructHash64(*this);
}
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index));
}
Pica::CompareFunc alpha_test_func;
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
std::array<TevStageConfigRaw, 6> tev_stages;
bool texture2_use_coord1;
u8 combiner_buffer_input;
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip;
bool alphablend_enable;
Pica::LogicOp logic_op;
struct {
struct {
unsigned num;
bool directional;
bool two_sided_diffuse;
bool dist_atten_enable;
bool spot_atten_enable;
bool geometric_factor_0;
bool geometric_factor_1;
bool shadow_enable;
} light[8];
bool enable;
unsigned src_num;
Pica::LightingRegs::LightingBumpMode bump_mode;
unsigned bump_selector;
bool bump_renorm;
bool clamp_highlights;
Pica::LightingRegs::LightingConfig config;
bool enable_primary_alpha;
bool enable_secondary_alpha;
bool enable_shadow;
bool shadow_primary;
bool shadow_secondary;
bool shadow_invert;
bool shadow_alpha;
unsigned shadow_selector;
struct {
bool enable;
bool abs_input;
Pica::LightingRegs::LightingLutInput type;
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
bool enable;
u32 coord;
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
bool separate_alpha;
bool noise_enable;
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
u32 lut_width;
u32 lut_offset0;
u32 lut_offset1;
u32 lut_offset2;
u32 lut_offset3;
u32 lod_min;
u32 lod_max;
Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex;
bool shadow_rendering;
bool shadow_texture_orthographic;
};
/**
* This struct contains information to identify a host vertex shader generated from PICA vertex
* shader.
*/
struct PicaVSConfig {
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
/// Returns the hash of the VS config
const u64 Hash() const noexcept {
return Common::ComputeStructHash64(*this);
}
u64 program_hash = 0;
u64 swizzle_hash = 0;
u32 main_offset = 0;
bool sanitize_mul = false;
// output_map[output register index] -> output attribute index
u32 num_outputs = 0;
std::array<u32, 16> output_map{};
};
/**
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
* shader pipeline
*/
struct PicaFixedGSConfig {
explicit PicaFixedGSConfig(const Pica::Regs& regs);
/// Returns the hash of the GS config
const u64 Hash() const noexcept {
return Common::ComputeStructHash64(*this);
}
u32 vs_output_attributes = 0;
u32 gs_output_attributes = 0;
struct SemanticMap {
u32 attribute_index = 0;
u32 component_index = 0;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps{};
};
/**
* Generates backend specific shader modules using the Pica state configuration
* @todo Be replaced with a unified shader compiler
*/
class ShaderGeneratorBase {
public:
ShaderGeneratorBase() = default;
virtual ~ShaderGeneratorBase() = default;
/**
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
* and directly passes them to the fragment shader.
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
virtual std::string GenerateTrivialVertexShader(bool separable_shader) = 0;
/**
* Generates the GLSL vertex shader program source code for the given VS program
* @returns String of the shader source code
*/
virtual std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
bool separable_shader) = 0;
/**
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
* @returns String of the shader source code
*/
virtual std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) = 0;
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
virtual std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) = 0;
};
} // namespace VideoCore

View File

@ -0,0 +1,171 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include "common/hash.h"
#include "common/intrusive_ptr.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
constexpr u32 MAX_COLOR_FORMATS = 5;
constexpr u32 MAX_DEPTH_FORMATS = 3;
enum class TextureFormat : u8 {
RGBA8 = 0,
RGB8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
D16 = 5,
D24 = 6,
D24S8 = 7,
Undefined = 255
};
enum class TextureType : u8 {
Texture1D = 0,
Texture2D = 1,
Texture3D = 2,
Undefined = 255
};
enum class TextureViewType : u8 {
View1D = 0,
View2D = 1,
View3D = 2,
ViewCube = 3,
View1DArray = 4,
View2DArray = 5,
ViewCubeArray = 6,
Undefined = 255
};
/**
* A rectangle describing part of a texture
* @param x, y are the offset from the bottom left corner
* @param width, height are the extent of the rectangle
*/
struct Rect2D {
s32 x = 0;
s32 y = 0;
u32 width = 0;
u32 height = 0;
};
/**
* Information about a texture packed to 8 bytes
*/
struct TextureInfo {
u16 width = 0;
u16 height = 0;
u8 levels = 0;
TextureType type = TextureType::Undefined;
TextureViewType view_type = TextureViewType::Undefined;
TextureFormat format = TextureFormat::Undefined;
const u64 Hash() const {
return Common::ComputeStructHash64(*this);
}
};
static_assert(sizeof(TextureInfo) == 8, "TextureInfo not packed!");
static_assert(std::is_standard_layout_v<TextureInfo>, "TextureInfo is not a standard layout!");
class TextureBase;
using TextureHandle = IntrusivePtr<TextureBase>;
class TextureBase : public IntrusivePtrEnabled<TextureBase> {
public:
TextureBase() = default;
TextureBase(const TextureInfo& info) : info(info) {}
virtual ~TextureBase() = default;
/// Uploads pixel data to the GPU memory
virtual void Upload(Rect2D rectangle, u32 stride, std::span<const u8> data,
u32 level = 0) {};
/// Downloads pixel data from GPU memory
virtual void Download(Rect2D rectangle, u32 stride, std::span<u8> data,
u32 level = 0) {};
/// Copies the rectangle area specified to the destionation texture
virtual void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect,
u32 src_level = 0, u32 dest_level = 0) {};
/// Returns the unique texture identifier
const u64 GetHash() const {
return info.Hash();
}
/// Returns the width of the texture
u16 GetWidth() const {
return info.width;
}
/// Returns the height of the texture
u16 GetHeight() const {
return info.height;
}
/// Returns the number of mipmap levels allocated
u16 GetMipLevels() const {
return info.levels;
}
/// Returns the pixel format
TextureFormat GetFormat() const {
return info.format;
}
protected:
TextureInfo info;
};
struct SamplerInfo {
Pica::TextureFilter mag_filter;
Pica::TextureFilter min_filter;
Pica::TextureFilter mip_filter;
Pica::WrapMode wrap_s;
Pica::WrapMode wrap_t;
u32 border_color = 0;
u32 lod_min = 0;
u32 lod_max = 0;
s32 lod_bias = 0;
const u64 Hash() const {
return Common::ComputeStructHash64(*this);
}
};
class SamplerBase : public IntrusivePtrEnabled<SamplerBase> {
public:
SamplerBase(SamplerInfo info) : info(info) {}
virtual ~SamplerBase() = default;
protected:
SamplerInfo info{};
};
using SamplerHandle = IntrusivePtr<SamplerBase>;
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::TextureInfo> {
std::size_t operator()(const VideoCore::TextureInfo& info) const noexcept {
return info.Hash();
}
};
template <>
struct hash<VideoCore::SamplerInfo> {
std::size_t operator()(const VideoCore::SamplerInfo& info) const noexcept {
return info.Hash();
}
};
} // namespace std

60
src/video_core/gpu.cpp Normal file
View File

@ -0,0 +1,60 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <type_traits>
#include "core/core.h"
#include "video_core/pica.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::System& system,
Frontend::EmuWindow& emu_window) {
auto& telemetry_session = system.TelemetrySession();
auto& cpu_memory = system.Memory();
switch (Settings::values.renderer_backend) {
case Settings::RendererBackend::OpenGL:
return std::make_unique<OpenGL::RendererOpenGL>(emu_window);
case Settings::RendererBackend::Vulkan:
return std::make_unique<Vulkan::RendererVulkan>(emu_window);
default:
return nullptr;
}
}
namespace Pica {
GPU::GPU(Core::System& system, Memory::MemorySystem& memory) :
system(system), memory(memory) {
//renderer = CreateRenderer(system, )
rasterizer = renderer->Rasterizer();
}
void GPU::SwapBuffers() {
renderer->SwapBuffers();
}
void GPU::FlushAll() {
rasterizer->FlushAll();
}
void GPU::FlushRegion(PAddr addr, u32 size) {
rasterizer->FlushRegion(addr, size);
}
void GPU::InvalidateRegion(PAddr addr, u32 size) {
rasterizer->InvalidateRegion(addr, size);
}
void GPU::FlushAndInvalidateRegion(PAddr addr, u32 size) {
rasterizer->FlushAndInvalidateRegion(addr, size);
}
void GPU::ClearAll(bool flush) {
rasterizer->ClearAll(flush);
}
} // namespace Pica

81
src/video_core/gpu.h Normal file
View File

@ -0,0 +1,81 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include "core/frontend/framebuffer_layout.h"
#include "video_core/maestro.h"
namespace Core {
class System;
}
namespace Memory {
class MemorySystem;
}
namespace Frontend {
class EmuWindow;
}
namespace VideoCore {
class RendererBase;
class RasterizerInterface;
}
namespace Pica {
class Maestro;
enum class ResultStatus {
Success,
ErrorGenericDrivers,
ErrorUnsupportedGL,
};
/**
* Interface for the PICA GPU
*/
class GPU {
public:
GPU(Core::System& system, Memory::MemorySystem& memory);
~GPU() = default;
/// Swap buffers (render frame)
void SwapBuffers();
/// Notify rasterizer that all caches should be flushed to 3DS memory
void FlushAll();
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
void FlushRegion(PAddr addr, u32 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(PAddr addr, u32 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(PAddr addr, u32 size);
/// Removes as much state as possible from the rasterizer in preparation for a save/load state
void ClearAll(bool flush);
/// Request a screenshot of the next frame
void RequestScreenshot(u8* data, std::function<void()> callback,
const Layout::FramebufferLayout& layout);
/// Returns the resolution scale factor
u16 GetResolutionScaleFactor();
private:
Core::System& system;
Memory::MemorySystem& memory;
// Renderer
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::unique_ptr<VideoCore::RendererBase> renderer = nullptr;
std::unique_ptr<Maestro> maestro = nullptr;
};
} // namespace VideoCore

View File

@ -34,13 +34,13 @@ template <typename T>
void Zero(T& o) {
static_assert(std::is_trivially_copyable_v<T>,
"It's undefined behavior to memset a non-trivially copyable type");
std::memset(&o, 0, sizeof(o));
memset(&o, 0, sizeof(o));
}
State::State() : geometry_pipeline(*this) {
auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
using Pica::Shader::OutputVertex;
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
const OutputVertex& v2) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};

View File

@ -4,7 +4,6 @@
#pragma once
#include "video_core/regs_texturing.h"
namespace Pica {
/// Initialize Pica state

View File

@ -0,0 +1,400 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
//#define PICA_REG(name, address)
PICA_REG(FINALIZE, 0x010)
PICA_REG(FACECULLING_CONFIG, 0x040)
PICA_REG(VIEWPORT_WIDTH, 0x041)
PICA_REG(VIEWPORT_INVW, 0x042)
PICA_REG(VIEWPORT_HEIGHT, 0x043)
PICA_REG(VIEWPORT_INVH, 0x044)
PICA_REG(FRAGOP_CLIP, 0x047)
PICA_REG(FRAGOP_CLIP_DATA0, 0x048)
PICA_REG(FRAGOP_CLIP_DATA1, 0x049)
PICA_REG(FRAGOP_CLIP_DATA2, 0x04A)
PICA_REG(FRAGOP_CLIP_DATA3, 0x04B)
PICA_REG(DEPTHMAP_SCALE, 0x04D)
PICA_REG(DEPTHMAP_OFFSET, 0x04E)
PICA_REG(SH_OUTMAP_TOTAL, 0x04F)
PICA_REG(SH_OUTMAP_O0, 0x050)
PICA_REG(SH_OUTMAP_O1, 0x051)
PICA_REG(SH_OUTMAP_O2, 0x052)
PICA_REG(SH_OUTMAP_O3, 0x053)
PICA_REG(SH_OUTMAP_O4, 0x054)
PICA_REG(SH_OUTMAP_O5, 0x055)
PICA_REG(SH_OUTMAP_O6, 0x056)
PICA_REG(EARLYDEPTH_FUNC, 0x061)
PICA_REG(EARLYDEPTH_TEST1, 0x062)
PICA_REG(EARLYDEPTH_CLEAR, 0x063)
PICA_REG(SH_OUTATTR_MODE, 0x064)
PICA_REG(SCISSORTEST_MODE, 0x065)
PICA_REG(SCISSORTEST_POS, 0x066)
PICA_REG(SCISSORTEST_DIM, 0x067)
PICA_REG(VIEWPORT_XY, 0x068)
PICA_REG(EARLYDEPTH_DATA, 0x06A)
PICA_REG(DEPTHMAP_ENABLE, 0x06D)
PICA_REG(RENDERBUF_DIM, 0x06E)
PICA_REG(SH_OUTATTR_CLOCK, 0x06F)
PICA_REG(TEXUNIT_CONFIG, 0x080)
PICA_REG(TEXUNIT0_BORDER_COLOR, 0x081)
PICA_REG(TEXUNIT0_DIM, 0x082)
PICA_REG(TEXUNIT0_PARAM, 0x083)
PICA_REG(TEXUNIT0_LOD, 0x084)
PICA_REG(TEXUNIT0_ADDR1, 0x085)
PICA_REG(TEXUNIT0_ADDR2, 0x086)
PICA_REG(TEXUNIT0_ADDR3, 0x087)
PICA_REG(TEXUNIT0_ADDR4, 0x088)
PICA_REG(TEXUNIT0_ADDR5, 0x089)
PICA_REG(TEXUNIT0_ADDR6, 0x08A)
PICA_REG(TEXUNIT0_SHADOW, 0x08B)
PICA_REG(TEXUNIT0_TYPE, 0x08E)
PICA_REG(LIGHTING_ENABLE0, 0x08F)
PICA_REG(TEXUNIT1_BORDER_COLOR, 0x091)
PICA_REG(TEXUNIT1_DIM, 0x092)
PICA_REG(TEXUNIT1_PARAM, 0x093)
PICA_REG(TEXUNIT1_LOD, 0x094)
PICA_REG(TEXUNIT1_ADDR, 0x095)
PICA_REG(TEXUNIT1_TYPE, 0x096)
PICA_REG(TEXUNIT2_BORDER_COLOR, 0x099)
PICA_REG(TEXUNIT2_DIM, 0x09A)
PICA_REG(TEXUNIT2_PARAM, 0x09B)
PICA_REG(TEXUNIT2_LOD, 0x09C)
PICA_REG(TEXUNIT2_ADDR, 0x09D)
PICA_REG(TEXUNIT2_TYPE, 0x09E)
PICA_REG(TEXUNIT3_PROCTEX0, 0x0A8)
PICA_REG(TEXUNIT3_PROCTEX1, 0x0A9)
PICA_REG(TEXUNIT3_PROCTEX2, 0x0AA)
PICA_REG(TEXUNIT3_PROCTEX3, 0x0AB)
PICA_REG(TEXUNIT3_PROCTEX4, 0x0AC)
PICA_REG(TEXUNIT3_PROCTEX5, 0x0AD)
PICA_REG(PROCTEX_LUT, 0x0AF)
PICA_REG(PROCTEX_LUT_DATA0, 0x0B0)
PICA_REG(PROCTEX_LUT_DATA1, 0x0B1)
PICA_REG(PROCTEX_LUT_DATA2, 0x0B2)
PICA_REG(PROCTEX_LUT_DATA3, 0x0B3)
PICA_REG(PROCTEX_LUT_DATA4, 0x0B4)
PICA_REG(PROCTEX_LUT_DATA5, 0x0B5)
PICA_REG(PROCTEX_LUT_DATA6, 0x0B6)
PICA_REG(PROCTEX_LUT_DATA7, 0x0B7)
PICA_REG(TEXENV0_SOURCE, 0x0C0)
PICA_REG(TEXENV0_OPERAND, 0x0C1)
PICA_REG(TEXENV0_COMBINER, 0x0C2)
PICA_REG(TEXENV0_COLOR, 0x0C3)
PICA_REG(TEXENV0_SCALE, 0x0C4)
PICA_REG(TEXENV1_SOURCE, 0x0C8)
PICA_REG(TEXENV1_OPERAND, 0x0C9)
PICA_REG(TEXENV1_COMBINER, 0x0CA)
PICA_REG(TEXENV1_COLOR, 0x0CB)
PICA_REG(TEXENV1_SCALE, 0x0CC)
PICA_REG(TEXENV2_SOURCE, 0x0D0)
PICA_REG(TEXENV2_OPERAND, 0x0D1)
PICA_REG(TEXENV2_COMBINER, 0x0D2)
PICA_REG(TEXENV2_COLOR, 0x0D3)
PICA_REG(TEXENV2_SCALE, 0x0D4)
PICA_REG(TEXENV3_SOURCE, 0x0D8)
PICA_REG(TEXENV3_OPERAND, 0x0D9)
PICA_REG(TEXENV3_COMBINER, 0x0DA)
PICA_REG(TEXENV3_COLOR, 0x0DB)
PICA_REG(TEXENV3_SCALE, 0x0DC)
PICA_REG(TEXENV_UPDATE_BUFFER, 0x0E0)
PICA_REG(FOG_COLOR, 0x0E1)
PICA_REG(GAS_ATTENUATION, 0x0E4)
PICA_REG(GAS_ACCMAX, 0x0E5)
PICA_REG(FOG_LUT_INDEX, 0x0E6)
PICA_REG(FOG_LUT_DATA0, 0x0E8)
PICA_REG(FOG_LUT_DATA1, 0x0E9)
PICA_REG(FOG_LUT_DATA2, 0x0EA)
PICA_REG(FOG_LUT_DATA3, 0x0EB)
PICA_REG(FOG_LUT_DATA4, 0x0EC)
PICA_REG(FOG_LUT_DATA5, 0x0ED)
PICA_REG(FOG_LUT_DATA6, 0x0EE)
PICA_REG(FOG_LUT_DATA7, 0x0EF)
PICA_REG(TEXENV4_SOURCE, 0x0F0)
PICA_REG(TEXENV4_OPERAND, 0x0F1)
PICA_REG(TEXENV4_COMBINER, 0x0F2)
PICA_REG(TEXENV4_COLOR, 0x0F3)
PICA_REG(TEXENV4_SCALE, 0x0F4)
PICA_REG(TEXENV5_SOURCE, 0x0F8)
PICA_REG(TEXENV5_OPERAND, 0x0F9)
PICA_REG(TEXENV5_COMBINER, 0x0FA)
PICA_REG(TEXENV5_COLOR, 0x0FB)
PICA_REG(TEXENV5_SCALE, 0x0FC)
PICA_REG(TEXENV_BUFFER_COLOR, 0x0FD)
PICA_REG(COLOR_OPERATION, 0x100)
PICA_REG(BLEND_FUNC, 0x101)
PICA_REG(LOGIC_OP, 0x102)
PICA_REG(BLEND_COLOR, 0x103)
PICA_REG(FRAGOP_ALPHA_TEST, 0x104)
PICA_REG(STENCIL_TEST, 0x105)
PICA_REG(STENCIL_OP, 0x106)
PICA_REG(DEPTH_COLOR_MASK, 0x107)
PICA_REG(FRAMEBUFFER_INVALIDATE, 0x110)
PICA_REG(FRAMEBUFFER_FLUSH, 0x111)
PICA_REG(COLORBUFFER_READ, 0x112)
PICA_REG(COLORBUFFER_WRITE, 0x113)
PICA_REG(DEPTHBUFFER_READ, 0x114)
PICA_REG(DEPTHBUFFER_WRITE, 0x115)
PICA_REG(DEPTHBUFFER_FORMAT, 0x116)
PICA_REG(COLORBUFFER_FORMAT, 0x117)
PICA_REG(EARLYDEPTH_TEST2, 0x118)
PICA_REG(FRAMEBUFFER_BLOCK32, 0x11B)
PICA_REG(DEPTHBUFFER_LOC, 0x11C)
PICA_REG(COLORBUFFER_LOC, 0x11D)
PICA_REG(FRAMEBUFFER_DIM, 0x11E)
PICA_REG(GAS_LIGHT_XY, 0x120)
PICA_REG(GAS_LIGHT_Z, 0x121)
PICA_REG(GAS_LIGHT_Z_COLOR, 0x122)
PICA_REG(GAS_LUT_INDEX, 0x123)
PICA_REG(GAS_LUT_DATA, 0x124)
PICA_REG(GAS_DELTAZ_DEPTH, 0x126)
PICA_REG(FRAGOP_SHADOW, 0x130)
PICA_REG(LIGHT0_SPECULAR0, 0x140)
PICA_REG(LIGHT0_SPECULAR1, 0x141)
PICA_REG(LIGHT0_DIFFUSE, 0x142)
PICA_REG(LIGHT0_AMBIENT, 0x143)
PICA_REG(LIGHT0_XY, 0x144)
PICA_REG(LIGHT0_Z, 0x145)
PICA_REG(LIGHT0_SPOTDIR_XY, 0x146)
PICA_REG(LIGHT0_SPOTDIR_Z, 0x147)
PICA_REG(LIGHT0_CONFIG, 0x149)
PICA_REG(LIGHT0_ATTENUATION_BIAS, 0x14A)
PICA_REG(LIGHT0_ATTENUATION_SCALE, 0x14B)
PICA_REG(LIGHT1_SPECULAR0, 0x150)
PICA_REG(LIGHT1_SPECULAR1, 0x151)
PICA_REG(LIGHT1_DIFFUSE, 0x152)
PICA_REG(LIGHT1_AMBIENT, 0x153)
PICA_REG(LIGHT1_XY, 0x154)
PICA_REG(LIGHT1_Z, 0x155)
PICA_REG(LIGHT1_SPOTDIR_XY, 0x156)
PICA_REG(LIGHT1_SPOTDIR_Z, 0x157)
PICA_REG(LIGHT1_CONFIG, 0x159)
PICA_REG(LIGHT1_ATTENUATION_BIAS, 0x15A)
PICA_REG(LIGHT1_ATTENUATION_SCALE, 0x15B)
PICA_REG(LIGHT2_SPECULAR0, 0x160)
PICA_REG(LIGHT2_SPECULAR1, 0x161)
PICA_REG(LIGHT2_DIFFUSE, 0x162)
PICA_REG(LIGHT2_AMBIENT, 0x163)
PICA_REG(LIGHT2_XY, 0x164)
PICA_REG(LIGHT2_Z, 0x165)
PICA_REG(LIGHT2_SPOTDIR_XY, 0x166)
PICA_REG(LIGHT2_SPOTDIR_Z, 0x167)
PICA_REG(LIGHT2_CONFIG, 0x169)
PICA_REG(LIGHT2_ATTENUATION_BIAS, 0x16A)
PICA_REG(LIGHT2_ATTENUATION_SCALE, 0x16B)
PICA_REG(LIGHT3_SPECULAR0, 0x170)
PICA_REG(LIGHT3_SPECULAR1, 0x171)
PICA_REG(LIGHT3_DIFFUSE, 0x172)
PICA_REG(LIGHT3_AMBIENT, 0x173)
PICA_REG(LIGHT3_XY, 0x174)
PICA_REG(LIGHT3_Z, 0x175)
PICA_REG(LIGHT3_SPOTDIR_XY, 0x176)
PICA_REG(LIGHT3_SPOTDIR_Z, 0x177)
PICA_REG(LIGHT3_CONFIG, 0x179)
PICA_REG(LIGHT3_ATTENUATION_BIAS, 0x17A)
PICA_REG(LIGHT3_ATTENUATION_SCALE, 0x17B)
PICA_REG(LIGHT4_SPECULAR0, 0x180)
PICA_REG(LIGHT4_SPECULAR1, 0x181)
PICA_REG(LIGHT4_DIFFUSE, 0x182)
PICA_REG(LIGHT4_AMBIENT, 0x183)
PICA_REG(LIGHT4_XY, 0x184)
PICA_REG(LIGHT4_Z, 0x185)
PICA_REG(LIGHT4_SPOTDIR_XY, 0x186)
PICA_REG(LIGHT4_SPOTDIR_Z, 0x187)
PICA_REG(LIGHT4_CONFIG, 0x189)
PICA_REG(LIGHT4_ATTENUATION_BIAS, 0x18A)
PICA_REG(LIGHT4_ATTENUATION_SCALE, 0x18B)
PICA_REG(LIGHT5_SPECULAR0, 0x190)
PICA_REG(LIGHT5_SPECULAR1, 0x191)
PICA_REG(LIGHT5_DIFFUSE, 0x192)
PICA_REG(LIGHT5_AMBIENT, 0x193)
PICA_REG(LIGHT5_XY, 0x194)
PICA_REG(LIGHT5_Z, 0x195)
PICA_REG(LIGHT5_SPOTDIR_XY, 0x196)
PICA_REG(LIGHT5_SPOTDIR_Z, 0x197)
PICA_REG(LIGHT5_CONFIG, 0x199)
PICA_REG(LIGHT5_ATTENUATION_BIAS, 0x19A)
PICA_REG(LIGHT5_ATTENUATION_SCALE, 0x19B)
PICA_REG(LIGHT6_SPECULAR0, 0x1A0)
PICA_REG(LIGHT6_SPECULAR1, 0x1A1)
PICA_REG(LIGHT6_DIFFUSE, 0x1A2)
PICA_REG(LIGHT6_AMBIENT, 0x1A3)
PICA_REG(LIGHT6_XY, 0x1A4)
PICA_REG(LIGHT6_Z, 0x1A5)
PICA_REG(LIGHT6_SPOTDIR_XY, 0x1A6)
PICA_REG(LIGHT6_SPOTDIR_Z, 0x1A7)
PICA_REG(LIGHT6_CONFIG, 0x1A9)
PICA_REG(LIGHT6_ATTENUATION_BIAS, 0x1AA)
PICA_REG(LIGHT6_ATTENUATION_SCALE, 0x1AB)
PICA_REG(LIGHT7_SPECULAR0, 0x1B0)
PICA_REG(LIGHT7_SPECULAR1, 0x1B1)
PICA_REG(LIGHT7_DIFFUSE, 0x1B2)
PICA_REG(LIGHT7_AMBIENT, 0x1B3)
PICA_REG(LIGHT7_XY, 0x1B4)
PICA_REG(LIGHT7_Z, 0x1B5)
PICA_REG(LIGHT7_SPOTDIR_XY, 0x1B6)
PICA_REG(LIGHT7_SPOTDIR_Z, 0x1B7)
PICA_REG(LIGHT7_CONFIG, 0x1B9)
PICA_REG(LIGHT7_ATTENUATION_BIAS, 0x1BA)
PICA_REG(LIGHT7_ATTENUATION_SCALE, 0x1BB)
PICA_REG(LIGHTING_AMBIENT, 0x1C0)
PICA_REG(LIGHTING_NUM_LIGHTS, 0x1C2)
PICA_REG(LIGHTING_CONFIG0, 0x1C3)
PICA_REG(LIGHTING_CONFIG1, 0x1C4)
PICA_REG(LIGHTING_LUT_INDEX, 0x1C5)
PICA_REG(LIGHTING_ENABLE1, 0x1C6)
PICA_REG(LIGHTING_LUT_DATA0, 0x1C8)
PICA_REG(LIGHTING_LUT_DATA1, 0x1C9)
PICA_REG(LIGHTING_LUT_DATA2, 0x1CA)
PICA_REG(LIGHTING_LUT_DATA3, 0x1CB)
PICA_REG(LIGHTING_LUT_DATA4, 0x1CC)
PICA_REG(LIGHTING_LUT_DATA5, 0x1CD)
PICA_REG(LIGHTING_LUT_DATA6, 0x1CE)
PICA_REG(LIGHTING_LUT_DATA7, 0x1CF)
PICA_REG(LIGHTING_LUTINPUT_ABS, 0x1D0)
PICA_REG(LIGHTING_LUTINPUT_SELECT, 0x1D1)
PICA_REG(LIGHTING_LUTINPUT_SCALE, 0x1D2)
PICA_REG(LIGHTING_LIGHT_PERMUTATION, 0x1D9)
PICA_REG(ATTRIBBUFFERS_LOC, 0x200)
PICA_REG(ATTRIBBUFFERS_FORMAT_LOW, 0x201)
PICA_REG(ATTRIBBUFFERS_FORMAT_HIGH, 0x202)
PICA_REG(ATTRIBBUFFER0_OFFSET, 0x203)
PICA_REG(ATTRIBBUFFER0_CONFIG1, 0x204)
PICA_REG(ATTRIBBUFFER0_CONFIG2, 0x205)
PICA_REG(ATTRIBBUFFER1_OFFSET, 0x206)
PICA_REG(ATTRIBBUFFER1_CONFIG1, 0x207)
PICA_REG(ATTRIBBUFFER1_CONFIG2, 0x208)
PICA_REG(ATTRIBBUFFER2_OFFSET, 0x209)
PICA_REG(ATTRIBBUFFER2_CONFIG1, 0x20A)
PICA_REG(ATTRIBBUFFER2_CONFIG2, 0x20B)
PICA_REG(ATTRIBBUFFER3_OFFSET, 0x20C)
PICA_REG(ATTRIBBUFFER3_CONFIG1, 0x20D)
PICA_REG(ATTRIBBUFFER3_CONFIG2, 0x20E)
PICA_REG(ATTRIBBUFFER4_OFFSET, 0x20F)
PICA_REG(ATTRIBBUFFER4_CONFIG1, 0x210)
PICA_REG(ATTRIBBUFFER4_CONFIG2, 0x211)
PICA_REG(ATTRIBBUFFER5_OFFSET, 0x212)
PICA_REG(ATTRIBBUFFER5_CONFIG1, 0x213)
PICA_REG(ATTRIBBUFFER5_CONFIG2, 0x214)
PICA_REG(ATTRIBBUFFER6_OFFSET, 0x215)
PICA_REG(ATTRIBBUFFER6_CONFIG1, 0x216)
PICA_REG(ATTRIBBUFFER6_CONFIG2, 0x217)
PICA_REG(ATTRIBBUFFER7_OFFSET, 0x218)
PICA_REG(ATTRIBBUFFER7_CONFIG1, 0x219)
PICA_REG(ATTRIBBUFFER7_CONFIG2, 0x21A)
PICA_REG(ATTRIBBUFFER8_OFFSET, 0x21B)
PICA_REG(ATTRIBBUFFER8_CONFIG1, 0x21C)
PICA_REG(ATTRIBBUFFER8_CONFIG2, 0x21D)
PICA_REG(ATTRIBBUFFER9_OFFSET, 0x21E)
PICA_REG(ATTRIBBUFFER9_CONFIG1, 0x21F)
PICA_REG(ATTRIBBUFFER9_CONFIG2, 0x220)
PICA_REG(ATTRIBBUFFER10_OFFSET, 0x221)
PICA_REG(ATTRIBBUFFER10_CONFIG1, 0x222)
PICA_REG(ATTRIBBUFFER10_CONFIG2, 0x223)
PICA_REG(ATTRIBBUFFER11_OFFSET, 0x224)
PICA_REG(ATTRIBBUFFER11_CONFIG1, 0x225)
PICA_REG(ATTRIBBUFFER11_CONFIG2, 0x226)
PICA_REG(INDEXBUFFER_CONFIG, 0x227)
PICA_REG(NUMVERTICES, 0x228)
PICA_REG(GEOSTAGE_CONFIG, 0x229)
PICA_REG(VERTEX_OFFSET, 0x22A)
PICA_REG(POST_VERTEX_CACHE_NUM, 0x22D)
PICA_REG(DRAWARRAYS, 0x22E)
PICA_REG(DRAWELEMENTS, 0x22F)
PICA_REG(VTX_FUNC, 0x231)
PICA_REG(FIXEDATTRIB_INDEX, 0x232)
PICA_REG(FIXEDATTRIB_DATA0, 0x233)
PICA_REG(FIXEDATTRIB_DATA1, 0x234)
PICA_REG(FIXEDATTRIB_DATA2, 0x235)
PICA_REG(CMDBUF_SIZE0, 0x238)
PICA_REG(CMDBUF_SIZE1, 0x239)
PICA_REG(CMDBUF_ADDR0, 0x23A)
PICA_REG(CMDBUF_ADDR1, 0x23B)
PICA_REG(CMDBUF_JUMP0, 0x23C)
PICA_REG(CMDBUF_JUMP1, 0x23D)
PICA_REG(VSH_NUM_ATTR, 0x242)
PICA_REG(VSH_COM_MODE, 0x244)
PICA_REG(START_DRAW_FUNC0, 0x245)
PICA_REG(VSH_OUTMAP_TOTAL1, 0x24A)
PICA_REG(VSH_OUTMAP_TOTAL2, 0x251)
PICA_REG(GSH_MISC0, 0x252)
PICA_REG(GEOSTAGE_CONFIG2, 0x253)
PICA_REG(GSH_MISC1, 0x254)
PICA_REG(PRIMITIVE_CONFIG, 0x25E)
PICA_REG(RESTART_PRIMITIVE, 0x25F)
PICA_REG(GSH_BOOLUNIFORM, 0x280)
PICA_REG(GSH_INTUNIFORM_I0, 0x281)
PICA_REG(GSH_INTUNIFORM_I1, 0x282)
PICA_REG(GSH_INTUNIFORM_I2, 0x283)
PICA_REG(GSH_INTUNIFORM_I3, 0x284)
PICA_REG(GSH_INPUTBUFFER_CONFIG, 0x289)
PICA_REG(GSH_ENTRYPOINT, 0x28A)
PICA_REG(GSH_ATTRIBUTES_PERMUTATION_LOW, 0x28B)
PICA_REG(GSH_ATTRIBUTES_PERMUTATION_HIGH, 0x28C)
PICA_REG(GSH_OUTMAP_MASK, 0x28D)
PICA_REG(GSH_CODETRANSFER_END, 0x28F)
PICA_REG(GSH_FLOATUNIFORM_INDEX, 0x290)
PICA_REG(GSH_FLOATUNIFORM_DATA0, 0x291)
PICA_REG(GSH_FLOATUNIFORM_DATA1, 0x292)
PICA_REG(GSH_FLOATUNIFORM_DATA2, 0x293)
PICA_REG(GSH_FLOATUNIFORM_DATA3, 0x294)
PICA_REG(GSH_FLOATUNIFORM_DATA4, 0x295)
PICA_REG(GSH_FLOATUNIFORM_DATA5, 0x296)
PICA_REG(GSH_FLOATUNIFORM_DATA6, 0x297)
PICA_REG(GSH_FLOATUNIFORM_DATA7, 0x298)
PICA_REG(GSH_CODETRANSFER_INDEX, 0x29B)
PICA_REG(GSH_CODETRANSFER_DATA0, 0x29C)
PICA_REG(GSH_CODETRANSFER_DATA1, 0x29D)
PICA_REG(GSH_CODETRANSFER_DATA2, 0x29E)
PICA_REG(GSH_CODETRANSFER_DATA3, 0x29F)
PICA_REG(GSH_CODETRANSFER_DATA4, 0x2A0)
PICA_REG(GSH_CODETRANSFER_DATA5, 0x2A1)
PICA_REG(GSH_CODETRANSFER_DATA6, 0x2A2)
PICA_REG(GSH_CODETRANSFER_DATA7, 0x2A3)
PICA_REG(GSH_OPDESCS_INDEX, 0x2A5)
PICA_REG(GSH_OPDESCS_DATA0, 0x2A6)
PICA_REG(GSH_OPDESCS_DATA1, 0x2A7)
PICA_REG(GSH_OPDESCS_DATA2, 0x2A8)
PICA_REG(GSH_OPDESCS_DATA3, 0x2A9)
PICA_REG(GSH_OPDESCS_DATA4, 0x2AA)
PICA_REG(GSH_OPDESCS_DATA5, 0x2AB)
PICA_REG(GSH_OPDESCS_DATA6, 0x2AC)
PICA_REG(GSH_OPDESCS_DATA7, 0x2AD)
PICA_REG(VSH_BOOLUNIFORM, 0x2B0)
PICA_REG(VSH_INTUNIFORM_I0, 0x2B1)
PICA_REG(VSH_INTUNIFORM_I1, 0x2B2)
PICA_REG(VSH_INTUNIFORM_I2, 0x2B3)
PICA_REG(VSH_INTUNIFORM_I3, 0x2B4)
PICA_REG(VSH_INPUTBUFFER_CONFIG, 0x2B9)
PICA_REG(VSH_ENTRYPOINT, 0x2BA)
PICA_REG(VSH_ATTRIBUTES_PERMUTATION_LOW, 0x2BB)
PICA_REG(VSH_ATTRIBUTES_PERMUTATION_HIGH, 0x2BC)
PICA_REG(VSH_OUTMAP_MASK, 0x2BD)
PICA_REG(VSH_CODETRANSFER_END, 0x2BF)
PICA_REG(VSH_FLOATUNIFORM_INDEX, 0x2C0)
PICA_REG(VSH_FLOATUNIFORM_DATA0, 0x2C1)
PICA_REG(VSH_FLOATUNIFORM_DATA1, 0x2C2)
PICA_REG(VSH_FLOATUNIFORM_DATA2, 0x2C3)
PICA_REG(VSH_FLOATUNIFORM_DATA3, 0x2C4)
PICA_REG(VSH_FLOATUNIFORM_DATA4, 0x2C5)
PICA_REG(VSH_FLOATUNIFORM_DATA5, 0x2C6)
PICA_REG(VSH_FLOATUNIFORM_DATA6, 0x2C7)
PICA_REG(VSH_FLOATUNIFORM_DATA7, 0x2C8)
PICA_REG(VSH_CODETRANSFER_INDEX, 0x2CB)
PICA_REG(VSH_CODETRANSFER_DATA0, 0x2CC)
PICA_REG(VSH_CODETRANSFER_DATA1, 0x2CD)
PICA_REG(VSH_CODETRANSFER_DATA2, 0x2CE)
PICA_REG(VSH_CODETRANSFER_DATA3, 0x2CF)
PICA_REG(VSH_CODETRANSFER_DATA4, 0x2D0)
PICA_REG(VSH_CODETRANSFER_DATA5, 0x2D1)
PICA_REG(VSH_CODETRANSFER_DATA6, 0x2D2)
PICA_REG(VSH_CODETRANSFER_DATA7, 0x2D3)
PICA_REG(VSH_OPDESCS_INDEX, 0x2D5)
PICA_REG(VSH_OPDESCS_DATA0, 0x2D6)
PICA_REG(VSH_OPDESCS_DATA1, 0x2D7)
PICA_REG(VSH_OPDESCS_DATA2, 0x2D8)
PICA_REG(VSH_OPDESCS_DATA3, 0x2D9)
PICA_REG(VSH_OPDESCS_DATA4, 0x2DA)
PICA_REG(VSH_OPDESCS_DATA5, 0x2DB)
PICA_REG(VSH_OPDESCS_DATA6, 0x2DC)
PICA_REG(VSH_OPDESCS_DATA7, 0x2DD)

View File

@ -1,4 +1,4 @@
// Copyright 2015 Citra Emulator Project
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -22,40 +22,44 @@ namespace Pica {
*
* @todo Verify on HW if this conversion is sufficiently accurate.
*/
template <unsigned M, unsigned E>
template <u32 M, u32 E>
struct Float {
static constexpr u32 width = M + E + 1;
static constexpr u32 bias = 128 - (1 << (E - 1));
static constexpr u32 exponent_mask = (1 << E) - 1;
static constexpr u32 mantissa_mask = (1 << M) - 1;
static constexpr u32 sign_mask = 1 << (E + M);
public:
static Float<M, E> FromFloat32(float val) {
Float<M, E> ret;
static Float FromFloat32(float val) {
Float ret;
ret.value = val;
return ret;
}
static Float<M, E> FromRaw(u32 hex) {
Float<M, E> res;
static Float FromRaw(u32 hex) {
Float res;
const int width = M + E + 1;
const int bias = 128 - (1 << (E - 1));
int exponent = (hex >> M) & ((1 << E) - 1);
const unsigned mantissa = hex & ((1 << M) - 1);
const unsigned sign = (hex >> (E + M)) << 31;
u32 exponent = (hex >> M) & exponent_mask;
const u32 mantissa = hex & mantissa_mask;
const u32 sign = (hex & sign_mask) << (31 - M - E);
if (hex & ((1 << (width - 1)) - 1)) {
if (exponent == (1 << E) - 1)
if (hex & (mantissa_mask | (exponent_mask << M))) {
if (exponent == exponent_mask) {
exponent = 255;
else
} else {
exponent += bias;
}
hex = sign | (mantissa << (23 - M)) | (exponent << 23);
} else {
hex = sign;
}
std::memcpy(&res.value, &hex, sizeof(float));
return res;
}
static Float<M, E> Zero() {
static Float Zero() {
return FromFloat32(0.f);
}
@ -64,80 +68,77 @@ public:
return value;
}
Float<M, E> operator*(const Float<M, E>& flt) const {
Float operator*(const Float& flt) const {
float result = value * flt.ToFloat32();
// PICA gives 0 instead of NaN when multiplying by inf
if (std::isnan(result))
if (!std::isnan(value) && !std::isnan(flt.ToFloat32()))
if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) {
result = 0.f;
return Float<M, E>::FromFloat32(result);
}
Float<M, E> operator/(const Float<M, E>& flt) const {
return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
return Float::FromFloat32(result);
}
Float<M, E> operator+(const Float<M, E>& flt) const {
return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
Float operator/(const Float& flt) const {
return Float::FromFloat32(ToFloat32() / flt.ToFloat32());
}
Float<M, E> operator-(const Float<M, E>& flt) const {
return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
Float operator+(const Float& flt) const {
return Float::FromFloat32(ToFloat32() + flt.ToFloat32());
}
Float<M, E>& operator*=(const Float<M, E>& flt) {
Float operator-(const Float& flt) const {
return Float::FromFloat32(ToFloat32() - flt.ToFloat32());
}
Float& operator*=(const Float& flt) {
value = operator*(flt).value;
return *this;
}
Float<M, E>& operator/=(const Float<M, E>& flt) {
Float& operator/=(const Float& flt) {
value /= flt.ToFloat32();
return *this;
}
Float<M, E>& operator+=(const Float<M, E>& flt) {
Float& operator+=(const Float& flt) {
value += flt.ToFloat32();
return *this;
}
Float<M, E>& operator-=(const Float<M, E>& flt) {
Float& operator-=(const Float& flt) {
value -= flt.ToFloat32();
return *this;
}
Float<M, E> operator-() const {
return Float<M, E>::FromFloat32(-ToFloat32());
Float operator-() const {
return Float::FromFloat32(-ToFloat32());
}
bool operator<(const Float<M, E>& flt) const {
bool operator<(const Float& flt) const {
return ToFloat32() < flt.ToFloat32();
}
bool operator>(const Float<M, E>& flt) const {
bool operator>(const Float& flt) const {
return ToFloat32() > flt.ToFloat32();
}
bool operator>=(const Float<M, E>& flt) const {
bool operator>=(const Float& flt) const {
return ToFloat32() >= flt.ToFloat32();
}
bool operator<=(const Float<M, E>& flt) const {
bool operator<=(const Float& flt) const {
return ToFloat32() <= flt.ToFloat32();
}
bool operator==(const Float<M, E>& flt) const {
bool operator==(const Float& flt) const {
return ToFloat32() == flt.ToFloat32();
}
bool operator!=(const Float<M, E>& flt) const {
bool operator!=(const Float& flt) const {
return ToFloat32() != flt.ToFloat32();
}
private:
static const unsigned MASK = (1 << (M + E + 1)) - 1;
static const unsigned MANTISSA_MASK = (1 << M) - 1;
static const unsigned EXPONENT_MASK = (1 << E) - 1;
// Stored as a regular float, merely for convenience
// TODO: Perform proper arithmetic on this!
float value;

View File

@ -13,10 +13,6 @@ namespace OpenGL {
struct ScreenInfo;
}
namespace Vulkan {
struct ScreenInfo;
}
namespace Pica::Shader {
struct OutputVertex;
} // namespace Pica::Shader
@ -84,13 +80,6 @@ public:
return false;
}
/// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
PAddr framebuffer_addr, u32 pixel_stride,
Vulkan::ScreenInfo& screen_info) {
return false;
}
/// Attempt to draw using hardware shaders
virtual bool AccelerateDrawBatch(bool is_indexed) {
return false;

View File

@ -14,14 +14,13 @@
namespace Pica {
struct FramebufferRegs {
enum class FragmentOperationMode : u32 {
enum class FragmentOperationMode : u32 {
Default = 0,
Gas = 1,
Shadow = 3,
};
};
enum class LogicOp : u32 {
enum class LogicOp : u32 {
Clear = 0,
And = 1,
AndReverse = 2,
@ -38,17 +37,17 @@ struct FramebufferRegs {
AndInverted = 13,
OrReverse = 14,
OrInverted = 15,
};
};
enum class BlendEquation : u32 {
enum class BlendEquation : u32 {
Add = 0,
Subtract = 1,
ReverseSubtract = 2,
Min = 3,
Max = 4,
};
};
enum class BlendFactor : u32 {
enum class BlendFactor : u32 {
Zero = 0,
One = 1,
SourceColor = 2,
@ -64,9 +63,9 @@ struct FramebufferRegs {
ConstantAlpha = 12,
OneMinusConstantAlpha = 13,
SourceAlphaSaturate = 14,
};
};
enum class CompareFunc : u32 {
enum class CompareFunc : u32 {
Never = 0,
Always = 1,
Equal = 2,
@ -75,9 +74,9 @@ struct FramebufferRegs {
LessThanOrEqual = 5,
GreaterThan = 6,
GreaterThanOrEqual = 7,
};
};
enum class StencilAction : u32 {
enum class StencilAction : u32 {
Keep = 0,
Zero = 1,
Replace = 2,
@ -86,8 +85,9 @@ struct FramebufferRegs {
Invert = 5,
IncrementWrap = 6,
DecrementWrap = 7,
};
};
struct FramebufferRegs {
struct {
union {
BitField<0, 2, FragmentOperationMode> fragment_operation_mode;

View File

@ -12,6 +12,13 @@
namespace Pica {
enum class TriangleTopology : u32 {
List = 0,
Strip = 1,
Fan = 2,
Shader = 3, // Programmable setup unit implemented in a geometry shader
};
struct PipelineRegs {
enum class VertexAttributeFormat : u32 {
BYTE = 0,
@ -250,13 +257,6 @@ struct PipelineRegs {
INSERT_PADDING_WORDS(0x9);
enum class TriangleTopology : u32 {
List = 0,
Strip = 1,
Fan = 2,
Shader = 3, // Programmable setup unit implemented in a geometry shader
};
BitField<8, 2, TriangleTopology> triangle_topology;
u32 restart_primitive;

View File

@ -6,21 +6,20 @@
#include <array>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/pica_types.h"
namespace Pica {
struct RasterizerRegs {
enum class CullMode : u32 {
// Select which polygons are considered to be "frontfacing".
// Select which polygons are considered to be "frontfacing".
enum class CullMode : u32 {
KeepAll = 0,
KeepClockWise = 1,
KeepCounterClockWise = 2,
// TODO: What does the third value imply?
};
KeepAll2 = 3 // Same as KeepAll
};
struct RasterizerRegs {
union {
BitField<0, 2, CullMode> cull_mode;
};

View File

@ -13,18 +13,7 @@
namespace Pica {
struct TexturingRegs {
struct TextureConfig {
enum TextureType : u32 {
Texture2D = 0,
TextureCube = 1,
Shadow2D = 2,
Projection2D = 3,
ShadowCube = 4,
Disabled = 5,
};
enum WrapMode : u32 {
enum WrapMode : u32 {
ClampToEdge = 0,
ClampToBorder = 1,
Repeat = 2,
@ -34,11 +23,22 @@ struct TexturingRegs {
ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat
Repeat2 = 6, // Same as Repeat
Repeat3 = 7, // Same as Repeat
};
};
enum TextureFilter : u32 {
enum TextureFilter : u32 {
Nearest = 0,
Linear = 1,
};
struct TexturingRegs {
struct TextureConfig {
enum TextureType : u32 {
Texture2D = 0,
TextureCube = 1,
Shadow2D = 2,
Projection2D = 3,
ShadowCube = 4,
Disabled = 5,
};
union {

View File

@ -5,8 +5,8 @@
#include <memory>
#include "core/frontend/emu_window.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/swrasterizer/swrasterizer.h"
#include "video_core/video_core.h"

View File

@ -414,7 +414,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
MICROPROFILE_SCOPE(OpenGL_GS);
const auto& regs = Pica::g_state.regs;
if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
if (regs.pipeline.use_gs != Pica::UseGS::No) {
LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader");
return false;
}

View File

@ -9,18 +9,17 @@
#include <tuple>
#include <utility>
#include <fmt/format.h>
#include <nihstro/shader_bytecode.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/shader_compiler/frontend/opcode.h"
#include "video_core/shader_compiler/frontned/instruction.h"
#include "video_core/shader_compiler/frontend/register.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
namespace OpenGL::ShaderDecompiler {
using Pica::Shader::OpCode;
using Pica::Shader::DestRegister;
using nihstro::Instruction;
using nihstro::OpCode;
using nihstro::RegisterType;
using nihstro::SourceRegister;
using nihstro::SwizzlePattern;
namespace OpenGL::ShaderDecompiler {
constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH;

View File

@ -9,7 +9,7 @@
#include <optional>
#include <string>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
#include "video_core/shader_compiler/shader.h"
namespace OpenGL::ShaderDecompiler {

View File

@ -175,11 +175,11 @@ public:
void Create(const char* source, GLenum type) {
if (shader_or_program.which() == 0) {
boost::get<OGLShader>(shader_or_program).Create(source, type);
std::get<OGLShader>(shader_or_program).Create(source, type);
} else {
OGLShader shader;
shader.Create(source, type);
OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
OGLProgram& program = std::get<OGLProgram>(shader_or_program);
program.Create(true, {shader.handle});
SetShaderUniformBlockBindings(program.handle);
@ -191,9 +191,9 @@ public:
GLuint GetHandle() const {
if (shader_or_program.which() == 0) {
return boost::get<OGLShader>(shader_or_program).handle;
return std::get<OGLShader>(shader_or_program).handle;
} else {
return boost::get<OGLProgram>(shader_or_program).handle;
return std::get<OGLProgram>(shader_or_program).handle;
}
}
@ -204,7 +204,7 @@ public:
}
private:
boost::variant<OGLShader, OGLProgram> shader_or_program;
std::variant<OGLShader, OGLProgram> shader_or_program;
};
class TrivialVertexShader {

View File

@ -1,4 +1,4 @@
// Copyright 2015 Citra Emulator Project
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -8,28 +8,32 @@
#include <glm/glm.hpp>
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_texturing.h"
#include "video_core/regs.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace PicaToVK {
using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter;
struct FilterInfo {
vk::Filter mag_filter, min_filter;
vk::SamplerMipmapMode mip_mode;
};
inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) {
std::array<vk::Filter, 2> filter_table = { vk::Filter::eNearest, vk::Filter::eLinear };
std::array<vk::SamplerMipmapMode, 2> mipmap_table = { vk::SamplerMipmapMode::eNearest, vk::SamplerMipmapMode::eLinear };
inline FilterInfo TextureFilterMode(Pica::TextureFilter mag, Pica::TextureFilter min,
Pica::TextureFilter mip) {
constexpr std::array filter_table = {
vk::Filter::eNearest,
vk::Filter::eLinear
};
return FilterInfo{filter_table[mag], filter_table[min], mipmap_table[mip]};
constexpr std::array mipmap_table = {
vk::SamplerMipmapMode::eNearest,
vk::SamplerMipmapMode::eLinear
};
return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)};
}
inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
inline vk::SamplerAddressMode WrapMode(Pica::WrapMode mode) {
static constexpr std::array<vk::SamplerAddressMode, 8> wrap_mode_table{{
vk::SamplerAddressMode::eClampToEdge,
vk::SamplerAddressMode::eClampToBorder,
@ -63,7 +67,7 @@ inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapM
return wrap_mode_table[index];
}
inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
inline vk::BlendOp BlendEquation(Pica::BlendEquation equation) {
static constexpr std::array<vk::BlendOp, 5> blend_equation_table{{
vk::BlendOp::eAdd,
vk::BlendOp::eSubtract,
@ -85,7 +89,7 @@ inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation)
return blend_equation_table[index];
}
inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
inline vk::BlendFactor BlendFunc(Pica::BlendFactor factor) {
static constexpr std::array<vk::BlendFactor, 15> blend_func_table{{
vk::BlendFactor::eZero, // BlendFactor::Zero
vk::BlendFactor::eOne, // BlendFactor::One
@ -117,7 +121,7 @@ inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
return blend_func_table[index];
}
inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
inline vk::LogicOp LogicOp(Pica::LogicOp op) {
static constexpr std::array<vk::LogicOp, 16> logic_op_table{{
vk::LogicOp::eClear, // Clear
vk::LogicOp::eAnd, // And
@ -150,7 +154,7 @@ inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
return logic_op_table[index];
}
inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
inline vk::CompareOp CompareFunc(Pica::CompareFunc func) {
static constexpr std::array<vk::CompareOp, 8> compare_func_table{{
vk::CompareOp::eNever, // CompareFunc::Never
vk::CompareOp::eAlways, // CompareFunc::Always
@ -175,7 +179,7 @@ inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
return compare_func_table[index];
}
inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
inline vk::StencilOp StencilOp(Pica::StencilAction action) {
static constexpr std::array<vk::StencilOp, 8> stencil_op_table{{
vk::StencilOp::eKeep, // StencilAction::Keep
vk::StencilOp::eZero, // StencilAction::Zero
@ -200,6 +204,30 @@ inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
return stencil_op_table[index];
}
inline vk::PrimitiveTopology PrimitiveTopology(Pica::TriangleTopology topology) {
switch (topology) {
case Pica::TriangleTopology::Fan:
return vk::PrimitiveTopology::eTriangleFan;
case Pica::TriangleTopology::List:
case Pica::TriangleTopology::Shader:
return vk::PrimitiveTopology::eTriangleList;
case Pica::TriangleTopology::Strip:
return vk::PrimitiveTopology::eTriangleStrip;
}
}
inline vk::CullModeFlags CullMode(Pica::CullMode mode) {
switch (mode) {
case Pica::CullMode::KeepAll:
case Pica::CullMode::KeepAll2:
return vk::CullModeFlagBits::eNone;
case Pica::CullMode::KeepClockWise:
return vk::CullModeFlagBits::eBack;
case Pica::CullMode::KeepCounterClockWise:
return vk::CullModeFlagBits::eFront;
}
}
inline glm::vec4 ColorRGBA8(const u32 color) {
return glm::vec4{
(color >> 0 & 0xFF) / 255.0f,

View File

@ -2,22 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// Enable vulkan platforms
#if defined(ANDROID) || defined (__ANDROID__)
#define VK_USE_PLATFORM_ANDROID_KHR 1
#elif defined(_WIN32)
#define VK_USE_PLATFORM_WIN32_KHR 1
#elif defined(__APPLE__)
#define VK_USE_PLATFORM_MACOS_MVK 1
#define VK_USE_PLATFORM_METAL_EXT 1
#else
#ifdef WAYLAND_DISPLAY
#define VK_USE_PLATFORM_WAYLAND_KHR 1
#else // wayland
#define VK_USE_PLATFORM_XLIB_KHR 1
#endif
#endif
#include <glm/gtc/matrix_transform.hpp>
#include "common/assert.h"
#include "common/logging/log.h"
@ -56,83 +40,6 @@
namespace Vulkan {
vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
const Frontend::EmuWindow& emu_window) {
const auto& window_info = emu_window.GetWindowInfo();
vk::SurfaceKHR surface;
#if VK_USE_PLATFORM_WIN32_KHR
if (window_info.type == Frontend::WindowSystemType::Windows) {
const HWND hWnd = static_cast<HWND>(window_info.render_surface);
const vk::Win32SurfaceCreateInfoKHR win32_ci{{}, nullptr, hWnd};
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
UNREACHABLE();
}
}
#elif VK_USE_PLATFORM_XLIB_KHR
if (window_info.type == Frontend::WindowSystemType::X11) {
const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
static_cast<Display*>(window_info.display_connection),
reinterpret_cast<Window>(window_info.render_surface)};
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
UNREACHABLE();
}
}
#elif VK_USE_PLATFORM_WAYLAND_KHR
if (window_info.type == Frontend::WindowSystemType::Wayland) {
const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
static_cast<wl_display*>(window_info.display_connection),
static_cast<wl_surface*>(window_info.render_surface)};
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
UNREACHABLE();
}
}
#endif
if (!surface) {
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
UNREACHABLE();
}
return surface;
}
std::vector<const char*> RequiredExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
case Frontend::WindowSystemType::Headless:
break;
#ifdef _WIN32
case Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#endif
#if !defined(_WIN32) && !defined(__APPLE__)
case Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
case Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
break;
}
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
return extensions;
}
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
: RendererBase{window} {

View File

@ -0,0 +1,178 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "core/core.h"
#include "common/object_pool.h"
#include "video_core/renderer_vulkan/vk_backend.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_texture.h"
namespace VideoCore::Vulkan {
Backend::Backend(Frontend::EmuWindow& window) : BackendBase(window),
instance(window), swapchain(instance, instance.GetSurface()),
scheduler(instance) {
// TODO: Properly report GPU hardware
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA");
telemetry_session.AddField(user_system, "GPU_Model", "GTX 1650");
telemetry_session.AddField(user_system, "GPU_Vulkan_Version", "Vulkan 1.3");
// Pre-create all needed renderpasses by the renderer
constexpr std::array color_formats = {
vk::Format::eR8G8B8A8Unorm,
vk::Format::eR8G8B8Unorm,
vk::Format::eR5G5B5A1UnormPack16,
vk::Format::eR5G6B5UnormPack16,
vk::Format::eR4G4B4A4UnormPack16
};
constexpr std::array depth_stencil_formats = {
vk::Format::eD16Unorm,
vk::Format::eX8D24UnormPack32,
vk::Format::eD24UnormS8Uint,
};
// Create all required renderpasses
for (u32 color = 0; color < MAX_COLOR_FORMATS; color++) {
for (u32 depth = 0; depth < MAX_DEPTH_FORMATS; depth++) {
u32 index = color * MAX_COLOR_FORMATS + depth;
renderpass_cache[index] = CreateRenderPass(color_formats[color], depth_stencil_formats[depth]);
}
}
}
Backend::~Backend() {
vk::Device device = instance.GetDevice();
for (auto& renderpass : renderpass_cache) {
device.destroyRenderPass(renderpass);
}
}
/**
* To avoid many small heap allocations during handle creation, each resource has a dedicated pool
* associated with it that batch allocates memory.
*/
BufferHandle Backend::CreateBuffer(BufferInfo info) {
static ObjectPool<Buffer> buffer_pool;
return IntrusivePtr<Buffer>{buffer_pool.Allocate(info)};
}
FramebufferHandle Backend::CreateFramebuffer(FramebufferInfo info) {
}
TextureHandle Backend::CreateTexture(TextureInfo info) {
static ObjectPool<Texture> texture_pool;
return IntrusivePtr<Texture>{texture_pool.Allocate(info)};
}
PipelineHandle Backend::CreatePipeline(PipelineType type, PipelineInfo info) {
static ObjectPool<Pipeline> pipeline_pool;
// Find a pipeline layout first
if (auto iter = pipeline_layouts.find(info.layout); iter != pipeline_layouts.end()) {
PipelineLayout& layout = iter->second;
return IntrusivePtr<Pipeline>{pipeline_pool.Allocate(instance, layout, type, info, cache)};
}
// Create the layout
auto result = pipeline_layouts.emplace(info.layout, PipelineLayout{instance, info.layout});
return IntrusivePtr<Pipeline>{pipeline_pool.Allocate(instance, result.first->second, type, info, cache)};
}
SamplerHandle Backend::CreateSampler(SamplerInfo info) {
static ObjectPool<Sampler> sampler_pool;
return IntrusivePtr<Sampler>{sampler_pool.Allocate(info)};
}
void Backend::Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
BufferHandle vertex_buffer,
u32 base_vertex, u32 num_vertices) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Buffer* vertex = static_cast<Buffer*>(vertex_buffer.Get());
command_buffer.bindVertexBuffers(0, vertex->GetHandle(), {0});
// Submit draw
command_buffer.draw(num_vertices, 1, base_vertex, 0);
}
void Backend::DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
BufferHandle vertex_buffer, BufferHandle index_buffer,
u32 base_index, u32 num_indices, u32 base_vertex) {
}
vk::RenderPass Backend::CreateRenderPass(vk::Format color, vk::Format depth) const {
// Define attachments
const std::array attachments = {
vk::AttachmentDescription{
.format = color,
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
.finalLayout = vk::ImageLayout::eColorAttachmentOptimal
},
vk::AttachmentDescription{
.format = depth,
.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal
}
};
// Our renderpasses only defines one color and depth attachment
const vk::AttachmentReference color_attachment_ref = {
.attachment = 0,
.layout = vk::ImageLayout::eColorAttachmentOptimal
};
const vk::AttachmentReference depth_attachment_ref = {
.attachment = 1,
.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal
};
const vk::SubpassDependency subpass_dependency = {
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 0,
.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput |
vk::PipelineStageFlagBits::eEarlyFragmentTests,
.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput |
vk::PipelineStageFlagBits::eEarlyFragmentTests,
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dependencyFlags = vk::DependencyFlagBits::eByRegion
};
// We also require only one subpass
const vk::SubpassDescription subpass = {
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = 1,
.pColorAttachments = &color_attachment_ref,
.pResolveAttachments = 0,
.pDepthStencilAttachment = &depth_attachment_ref
};
const vk::RenderPassCreateInfo renderpass_info = {
.attachmentCount = 2,
.pAttachments = attachments.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 1,
.pDependencies = &subpass_dependency
};
// Create the renderpass
vk::Device device = instance.GetDevice();
return device.createRenderPass(renderpass_info);
}
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,75 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <unordered_map>
#include "video_core/common/backend.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline.h"
namespace VideoCore::Vulkan {
class Texture;
constexpr u32 RENDERPASS_COUNT = MAX_COLOR_FORMATS * MAX_DEPTH_FORMATS;
class Backend : public VideoCore::BackendBase {
public:
Backend(Frontend::EmuWindow& window);
~Backend();
void SwapBuffers() override;
BufferHandle CreateBuffer(BufferInfo info) override;
FramebufferHandle CreateFramebuffer(FramebufferInfo info) override;
TextureHandle CreateTexture(TextureInfo info) override;
PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) override;
SamplerHandle CreateSampler(SamplerInfo info) override;
void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
BufferHandle vertex_buffer,
u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
BufferHandle vertex_buffer, BufferHandle index_buffer,
u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchCompute(PipelineHandle pipeline, Common::Vec3<u32> groupsize,
Common::Vec3<u32> groups) override;
// Returns the vulkan instance
inline const Instance& GetInstance() const {
return instance;
}
// Returns the vulkan command buffer scheduler
inline CommandScheduler& GetScheduler() {
return scheduler;
}
private:
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth) const;
private:
Instance instance;
Swapchain swapchain;
CommandScheduler scheduler;
// The formats Citra uses are limited so we can pre-create
// all the renderpasses we will need
std::array<vk::RenderPass, RENDERPASS_COUNT> renderpass_cache;
vk::PipelineCache cache;
// Pipeline layout cache
std::unordered_map<PipelineLayoutInfo, PipelineLayout> pipeline_layouts;
};
} // namespace Vulkan

View File

@ -2,165 +2,181 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include <cstring>
namespace Vulkan {
namespace VideoCore::Vulkan {
inline vk::BufferUsageFlags ToVkBufferUsage(BufferUsage usage) {
constexpr std::array vk_buffer_usages = {
vk::BufferUsageFlagBits::eVertexBuffer,
vk::BufferUsageFlagBits::eIndexBuffer,
vk::BufferUsageFlagBits::eUniformBuffer,
vk::BufferUsageFlagBits::eUniformTexelBuffer,
vk::BufferUsageFlagBits::eTransferSrc
};
return vk::BufferUsageFlagBits::eTransferDst |
vk_buffer_usages.at(static_cast<u32>(usage));
}
inline vk::Format ToVkViewFormat(ViewFormat format) {
constexpr std::array vk_view_formats = {
vk::Format::eR32Sfloat,
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32B32Sfloat,
vk::Format::eR32G32B32A32Sfloat
};
return vk_view_formats.at(static_cast<u32>(format));
}
Buffer::Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info) :
BufferBase(info), instance(instance), scheduler(scheduler) {
vk::BufferCreateInfo buffer_info = {
.size = info.capacity,
.usage = ToVkBufferUsage(info.usage)
};
VmaAllocationCreateInfo alloc_create_info = {
.flags = info.usage == BufferUsage::Staging ?
(VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_MAPPED_BIT) :
VmaAllocationCreateFlags{},
.usage = VMA_MEMORY_USAGE_AUTO
};
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
// Allocate texture memory
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
&unsafe_buffer, &allocation, &alloc_info);
buffer = vk::Buffer{unsafe_buffer};
u32 view = 0;
vk::Device device = instance.GetDevice();
while (info.views[view] != ViewFormat::Undefined) {
const vk::BufferViewCreateInfo view_info = {
.buffer = buffer,
.format = ToVkViewFormat(info.views[view]),
.range = info.capacity
};
views[view++] = device.createBufferView(view_info);
}
// Map memory
if (info.usage == BufferUsage::Staging) {
mapped_ptr = alloc_info.pMappedData;
}
}
Buffer::~Buffer() {
Destroy();
}
void Buffer::Create(const Buffer::Info& info) {
auto device = g_vk_instace->GetDevice();
buffer_info = info;
vk::BufferCreateInfo bufferInfo({}, info.size, info.usage);
buffer = device.createBuffer(bufferInfo);
auto mem_requirements = device.getBufferMemoryRequirements(buffer);
auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, info.properties);
vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index);
memory = device.allocateMemory(alloc_info);
device.bindBufferMemory(buffer, memory, 0);
// Optionally map the buffer to CPU memory
if (info.properties & vk::MemoryPropertyFlagBits::eHostVisible) {
host_ptr = device.mapMemory(memory, 0, info.size);
}
for (auto& format : info.view_formats) {
if (format != vk::Format::eUndefined) {
views[view_count++] = device.createBufferView({{}, buffer, format, 0, info.size});
}
}
}
void Buffer::Recreate() {
Destroy();
Create(buffer_info);
}
void Buffer::Destroy() {
if (buffer) {
if (host_ptr != nullptr) {
g_vk_instace->GetDevice().unmapMemory(memory);
}
auto deleter = [allocation = allocation,
buffer = buffer,
views = views](vk::Device device, VmaAllocator allocator) {
vmaDestroyBuffer(allocator, static_cast<VkBuffer>(buffer), allocation);
auto deleter = [buffer = buffer,
memory = memory,
view_count = view_count,
views = views]() {
auto device = g_vk_instace->GetDevice();
device.destroyBuffer(buffer);
device.freeMemory(memory);
for (u32 i = 0; i < view_count; i++) {
device.destroyBufferView(views[i]);
u32 view_index = 0;
while (views[view_index]) {
device.destroyBufferView(views[view_index++]);
}
};
g_vk_task_scheduler->Schedule(deleter);
// Delete the buffer immediately if it's allocated in host memory
if (info.usage == BufferUsage::Staging) {
vk::Device device = instance.GetDevice();
VmaAllocator allocator = instance.GetAllocator();
deleter(device, allocator);
} else {
scheduler.Schedule(deleter);
}
}
}
u32 Buffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties) {
vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties();
for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++)
{
auto flags = mem_properties.memoryTypes[i].propertyFlags;
if ((type_filter & (1 << i)) && (flags & properties) == properties)
return i;
}
LOG_CRITICAL(Render_Vulkan, "Failed to find suitable memory type.");
UNREACHABLE();
}
void Buffer::Upload(std::span<const std::byte> data, u32 offset,
vk::AccessFlags access_to_block,
vk::PipelineStageFlags stage_to_block) {
auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer();
// For small data uploads use vkCmdUpdateBuffer
if (data.size_bytes() < 1024) {
cmdbuffer.updateBuffer(buffer, 0, data.size_bytes(), data.data());
}
else {
auto [ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(data.size());
if (!ptr) {
LOG_ERROR(Render_Vulkan, "Cannot upload data without staging buffer!");
}
// Copy pixels to staging buffer
std::memcpy(ptr, data.data(), data.size_bytes());
auto region = vk::BufferCopy{staging_offset, offset, data.size_bytes()};
auto& staging = g_vk_task_scheduler->GetStaging();
cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, region);
}
vk::BufferMemoryBarrier barrier{
vk::AccessFlagBits::eTransferWrite, access_to_block,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
buffer, offset, data.size_bytes()
};
// Add a pipeline barrier for the region modified
cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block,
vk::DependencyFlagBits::eByRegion,
0, nullptr, 1, &barrier, 0, nullptr);
}
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
ASSERT(size <= buffer_info.size);
ASSERT(alignment <= buffer_info.size);
std::span<u8> Buffer::Map(u32 size, u32 alignment) {
ASSERT(size <= info.capacity && alignment <= info.capacity);
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
buffer_offset = Common::AlignUp<std::size_t>(buffer_offset, alignment);
}
bool invalidate = false;
if (buffer_pos + size > buffer_info.size) {
buffer_pos = 0;
invalidate = true;
// If the buffer is full, invalidate it
if (buffer_offset + size > info.capacity) {
Invalidate();
}
auto [staging_ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(size);
mapped_chunk = vk::BufferCopy{staging_offset, buffer_pos, size};
return std::make_tuple(staging_ptr, buffer_pos, invalidate);
if (info.usage == BufferUsage::Staging) {
return std::span<u8>{reinterpret_cast<u8*>(mapped_ptr) + buffer_offset, size};
} else {
Buffer& staging = scheduler.GetCommandUploadBuffer();
return staging.Map(size, alignment);
}
}
void StreamBuffer::Commit(u32 size, vk::AccessFlags access_to_block,
vk::PipelineStageFlags stage_to_block) {
if (size > 0) {
mapped_chunk.size = size;
void Buffer::Commit(u32 size) {
VmaAllocator allocator = instance.GetAllocator();
if (info.usage == BufferUsage::Staging && size > 0) {
vmaFlushAllocation(allocator, allocation, buffer_offset, size);
} else {
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
Buffer& staging = scheduler.GetCommandUploadBuffer();
auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer();
auto& staging = g_vk_task_scheduler->GetStaging();
cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, mapped_chunk);
const vk::BufferCopy copy_region = {
.srcOffset = staging.GetCurrentOffset(),
.dstOffset = buffer_offset,
.size = size
};
vk::BufferMemoryBarrier barrier{
vk::AccessFlagBits::eTransferWrite, access_to_block,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
buffer, mapped_chunk.dstOffset, mapped_chunk.size
// Copy staging buffer to device local buffer
command_buffer.copyBuffer(staging.GetHandle(), buffer, copy_region);
vk::AccessFlags access_mask;
vk::PipelineStageFlags stage_mask;
switch (info.usage) {
case BufferUsage::Vertex:
access_mask = vk::AccessFlagBits::eVertexAttributeRead;
stage_mask = vk::PipelineStageFlagBits::eVertexInput;
break;
case BufferUsage::Index:
access_mask = vk::AccessFlagBits::eIndexRead;
stage_mask = vk::PipelineStageFlagBits::eVertexInput;
break;
case BufferUsage::Uniform:
case BufferUsage::Texel:
access_mask = vk::AccessFlagBits::eUniformRead;
stage_mask = vk::PipelineStageFlagBits::eVertexShader |
vk::PipelineStageFlagBits::eFragmentShader;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown BufferUsage flag!");
}
const vk::BufferMemoryBarrier buffer_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = access_mask,
.buffer = buffer,
.offset = buffer_offset,
.size = size
};
// Add a pipeline barrier for the region modified
cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block,
vk::DependencyFlagBits::eByRegion,
0, nullptr, 1, &barrier, 0, nullptr);
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
buffer_pos += size;
}
buffer_offset += size;
}
}

View File

@ -4,80 +4,47 @@
#pragma once
#include <memory>
#include <vector>
#include <deque>
#include <span>
#include "common/common_types.h"
#include <array>
#include "common/assert.h"
#include "video_core/common/buffer.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
constexpr u32 MAX_BUFFER_VIEWS = 5;
constexpr u32 MAX_COMMIT_CHUNKS = 6;
class Instance;
class CommandScheduler;
/// Generic Vulkan buffer object used by almost every resource
class Buffer : public NonCopyable {
class Buffer : public VideoCore::BufferBase {
public:
struct Info {
u32 size;
vk::MemoryPropertyFlags properties;
vk::BufferUsageFlags usage;
std::array<vk::Format, MAX_BUFFER_VIEWS> view_formats{};
};
Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info);
~Buffer() override;
Buffer() = default;
~Buffer();
std::span<u8> Map(u32 size, u32 alignment = 0) override;
/// Enable move operations
Buffer(Buffer&&) = default;
Buffer& operator=(Buffer&&) = default;
/// Flushes write to buffer memory
void Commit(u32 size = 0) override;
/// Create a new Vulkan buffer object
void Create(const Info& info);
void Recreate();
void Destroy();
/// Returns the Vulkan buffer handle
vk::Buffer GetHandle() const {
return buffer;
}
/// Global utility functions used by other objects
static u32 FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties);
/// Return a pointer to the mapped memory if the buffer is host mapped
u8* GetHostPointer() const { return reinterpret_cast<u8*>(host_ptr); }
const vk::BufferView& GetView(u32 i = 0) const { return views[i]; }
const vk::Buffer& GetBuffer() const { return buffer; }
u32 GetSize() const { return buffer_info.size; }
void Upload(std::span<const std::byte> data, u32 offset,
vk::AccessFlags access_to_block = vk::AccessFlagBits::eVertexAttributeRead,
vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexInput);
/// Returns an immutable reference to the requested buffer view
const vk::BufferView& GetView(u32 index = 0) const {
ASSERT(index < view_count);
return views[index];
}
protected:
Info buffer_info;
vk::Buffer buffer;
vk::DeviceMemory memory;
void* host_ptr = nullptr;
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views;
u32 view_count{};
};
Instance& instance;
CommandScheduler& scheduler;
class StreamBuffer : public Buffer {
public:
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
* The return values are the pointer to the new chunk, the offset within the buffer,
* and the invalidation flag for previous chunks.
* The actual used size must be specified on unmapping the chunk.
*/
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
void Commit(u32 size, vk::AccessFlags access_to_block = vk::AccessFlagBits::eUniformRead,
vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexShader |
vk::PipelineStageFlagBits::eFragmentShader);
private:
u32 buffer_pos{};
vk::BufferCopy mapped_chunk;
// Vulkan buffer handle
void* mapped_ptr = nullptr;
vk::Buffer buffer = VK_NULL_HANDLE;
VmaAllocation allocation = VK_NULL_HANDLE;
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
u32 view_count = 0;
};
}

View File

@ -14,3 +14,74 @@
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1
#include <vk_mem_alloc.h>
namespace VideoCore::Vulkan {
/// Returns the aligned byte size of each pixel in the specified format
constexpr float GetFormatSize(vk::Format format) {
switch (format) {
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eD24UnormS8Uint:
return 4;
case vk::Format::eR8G8B8Unorm:
return 3;
case vk::Format::eR5G5B5A1UnormPack16:
case vk::Format::eR5G6B5UnormPack16:
case vk::Format::eR4G4B4A4UnormPack16:
case vk::Format::eD16Unorm:
return 2;
default:
return 0;
};
}
/// Return the image aspect associated on the provided format
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
vk::ImageAspectFlags flags;
switch (format) {
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD24UnormS8Uint:
case vk::Format::eX8D24UnormPack32:
case vk::Format::eD32SfloatS8Uint:
flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
break;
case vk::Format::eD16Unorm:
case vk::Format::eD32Sfloat:
flags = vk::ImageAspectFlagBits::eDepth;
break;
default:
flags = vk::ImageAspectFlagBits::eColor;
}
return flags;
}
/// Returns a bit mask with the required usage of a format with a particular aspect
constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) {
auto usage = vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eTransferSrc;
if (aspect & vk::ImageAspectFlagBits::eDepth) {
return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
return usage | vk::ImageUsageFlagBits::eColorAttachment;
}
};
/// Returns a bit mask with the required features of a format with a particular aspect
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) {
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
vk::FormatFeatureFlagBits::eTransferDst |
vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst;
if (aspect & vk::ImageAspectFlagBits::eDepth) {
return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment;
} else {
return usage | vk::FormatFeatureFlagBits::eColorAttachment;
}
};
}

View File

@ -0,0 +1,6 @@
#include "vk_format_util.h"
vk_format_util::vk_format_util()
{
}

View File

@ -0,0 +1,436 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <bit>
#include <string_view>
#include <array>
#include <cstring>
#include <bitset>
#include <type_traits>
#include <vulkan/vulkan_format_traits.hpp>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore::Vulkan {
enum class SIMD : u8 {
None = 0,
SSE4 = 1,
AVX2 = 2,
NEON = 3
};
/**
* A Pixel holds a pixel value or a SIMD lane holding multiple "real" pixels
*/
#pragma pack(1)
template <u8 bytes, SIMD simd = SIMD::None>
struct Pixel {
using StorageType = std::conditional_t<bytes <= 1, u8,
std::conditional_t<bytes <= 2, u16,
std::conditional_t<bytes <= 4, u32, u64>>>;
Pixel() = default;
// Memory load/store
constexpr void Load(u8* memory) {
std::memcpy(&storage, memory, bytes);
}
constexpr void Store(u8* memory) const {
std::memcpy(memory, &storage, bytes);
}
// Returns the number of bytes until the next pixel
constexpr u8 GetStride() const {
return bytes;
}
// Bitwise operators
constexpr Pixel RotateRight(int n) const {
return std::rotr(storage, n);
}
constexpr StorageType operator & (const StorageType mask) const {
return storage & mask;
}
constexpr StorageType operator | (const StorageType mask) const {
return storage | mask;
}
constexpr StorageType operator >>(const int n) const {
return storage >> n;
}
constexpr StorageType operator <<(const int n) const {
return storage << n;
}
private:
StorageType storage;
};
#pragma pack()
/**
* Information about a pixel format
*/
template <u8 Components>
struct FormatInfo {
constexpr FormatInfo(vk::Format format) {
for (int i = 0; i < components; i++) {
name[i] = vk::componentName(format, i)[0];
is_float[i] = std::string_view{vk::componentNumericFormat(format, i)}
== "SFLOAT";
bits[i] = vk::componentBits(format, i);
bit_offset[i] = (i > 0 ? bit_offset[i - 1] + bits[i - 1] : 0);
}
bytes = (format == vk::Format::eD32SfloatS8Uint ? 8 :
vk::blockSize(format));
}
static constexpr u32 components = Components;
std::array<char, components> name;
std::array<bool, components> is_float;
std::array<u8, components> bit_offset;
std::array<u8, components> bits;
u8 bytes; // This includes the padding in D32S8
};
/**
* Represents a mapping of components from one format to another
*/
template <FormatInfo source, FormatInfo dest>
struct Mapping {
static constexpr u32 component_map_bits = 4;
static constexpr u32 component_map_mask = (1 << component_map_bits) - 1;
constexpr Mapping() {
for (int i = 0; i < source.names.size(); i++) {
constexpr char source_name = source.names[i];
for (u8 j = 0; j < dest.names.size(); j++) {
constexpr char dest_name = dest.names[j];
if constexpr (source_name == dest_name) {
storage |= ((j & component_map_mask) << component_map_bits * i);
break;
}
}
}
}
constexpr u8 GetMapping(const int component) {
return (storage >> (component * component_map_bits)) & component_map_mask;
}
// Returns the number of bits to rotate a pixel to the right
// to match the mapping of the destiation format. If it's not
// possible returns -1
constexpr s32 TestMappingRotation() {
constexpr u16 identity = 0x3210;
u32 total_bits_rotated = 0;
auto test_rotation = [&](s32 i) -> bool {
return (storage == std::rotr(identity, i * component_map_bits));
};
for (s32 rot = 0; rot < 4; rot++) {
if (test_rotation(rot)) {
return total_bits_rotated;
}
total_bits_rotated += source.bits[rot];
}
return -1;
}
// Returns true if the each component of the source format has the
// same bit-width as the mapped destination format component
constexpr bool AreBitwiseEqual() {
bool result = source.bytes == dest.bytes;
for (int i = 0; i < source.components; i++) {
result &= (source.bits[i] == dest.bits[GetMapping(i)]);
}
return result;
}
private:
// Since there are at most 4 components we can use 4 bits for each component
u16 storage = 0xFFFF;
};
// Allows for loop like iteration at compile time
template <auto Start, auto End, class F>
constexpr void ForEach(F&& f) {
if constexpr (Start < End) {
f(std::integral_constant<decltype(Start), Start>());
ForEach<Start + 1, End>(f);
}
}
// Copies pixel data from a source to a destionation buffer, performing
// format conversion at the same time
template <vk::Format source_format, vk::Format dest_format, SIMD simd>
constexpr void Convert2(std::span<const u8> source, std::span<u8> dest) {
constexpr u32 source_components = vk::componentCount(source_format);
constexpr u32 dest_components = vk::componentCount(dest_format);
// Query vulkan hpp format traits for the info we need
constexpr FormatInfo<source_components> source_info{source_format};
constexpr FormatInfo<dest_components> dest_info{dest_format};
// Create a table with the required component mapping
constexpr Mapping<source_info, dest_info> mapping{};
// Begin conversion
u32 source_offset = 0;
u32 dest_offset = 0;
while (source_offset < source.size()) {
// Load source pixel
Pixel<source_info.bytes, simd> source_pixel;
Pixel<dest_info.bytes, simd> dest_pixel{};
// Load data into the pixel
source_pixel.Load(source.data() + source_offset);
// OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations
// of one another. We can use a faster path for these
if constexpr (s32 rot = mapping.TestMappingRotation();
rot > -1 && mapping.AreBitwiseEqual()) {
dest_pixel = source_pixel.RotateRight(rot);
// RGB8 <-> RGBA8 is extrenely common on desktop GPUs
// so it deserves a special path
} else if constexpr (true) {
} else {
ForEach<0, source_components>([&](auto comp) {
constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3;
// If the component is not mapped skip it
if constexpr (dest_comp == 0xFF) {
return;
}
// Retrieve component
u32 component = GetComponent<source_format, source_bytes, comp>(source_pixel);
constexpr bool is_source_float = IsFloat<source_format>(comp);
constexpr bool is_dest_float = IsFloat<dest_format>(dest_comp);
// Perform float <-> int conversion (normalization)
if constexpr (is_source_float && !is_dest_float) {
float temp;
std::memcpy(&temp, &component, sizeof(float));
constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1;
component = static_cast<u32>(temp * mask);
} else if constexpr (!is_source_float && is_dest_float) {
constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1;
float temp = static_cast<float>(component) / mask;
std::memcpy(&component, &temp, sizeof(float));
}
SetComponent<dest_format, dest_bytes, dest_comp>(dest_pixel, component);
});
}
// Write destination pixel (dest_bytes includes the padding so we cannot use it here)
std::memcpy(dest.data() + dest_offset, DataPtr<dest_bytes>(dest_pixel),
vk::blockSize(dest_format));
// Copy next pixel
source_offset += source_pixel.GetStride();
dest_offset += dest_pixel.GetStride();
}
}
// Asign the byte count with an integral type
template <u8 bytes>
struct PackedInt { using type = typename std::array<u8, bytes>; };
template <>
struct PackedInt<1> { using type = u8; };
template <>
struct PackedInt<2> { using type = u16; };
template <>
struct PackedInt<4> { using type = u32; };
template <>
struct PackedInt<8> { using type = u64; };
template <u8 bytes>
using PackedType = typename PackedInt<bytes>::type;
// Returns the pointer to the raw bytes respecting the underlying type
template <u8 bytes>
constexpr u8* DataPtr(PackedType<bytes>& data) {
if constexpr (std::is_integral_v<PackedType<bytes>>) {
return reinterpret_cast<u8*>(&data);
} else {
return data.data();
}
}
// Returns true when the specified component is of float type
template <vk::Format format>
constexpr bool IsFloat(u8 component) {
return std::string_view{vk::componentNumericFormat(format, component)} == "SFLOAT";
}
// Returns the offset in bits of the component from the start of the pixel
template <vk::Format format, u8 component, u8 i = 0>
constexpr u32 GetComponentBitOffset() {
if constexpr (i == component) {
return 0;
} else {
return vk::componentBits(format, i) +
GetComponentBitOffset<format, component, i + 1>();
}
}
// Returns the data located at the specified component
template <vk::Format format, u8 bytes, u8 component>
constexpr u32 GetComponent(PackedType<bytes>& pixel) {
constexpr u64 bit_offset = GetComponentBitOffset<format, component>();
constexpr u64 component_bits = vk::componentBits(format, component);
constexpr u64 mask = (1 << component_bits) - 1;
// First process packed formats which are easy to extract from
if constexpr (std::is_integral_v<PackedType<bytes>>) {
return (pixel >> bit_offset) & mask;
} else {
// Assume component_bits and offset are byte aligned. Otherwise
// this would be extremely complicated
using ComponentType = PackedType<(component_bits >> 3)>;
static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0);
static_assert(std::is_integral_v<ComponentType>);
constexpr u64 byte_offset = bit_offset >> 3;
return *reinterpret_cast<ComponentType*>(DataPtr<bytes>(pixel) + byte_offset);
}
}
template <vk::Format format, u8 bytes, u8 component>
constexpr void SetComponent(PackedType<bytes>& pixel, u32 data) {
constexpr u64 bit_offset = GetComponentBitOffset<format, component>();
constexpr u64 component_bits = vk::componentBits(format, component);
constexpr u64 mask = (1ull << component_bits) - 1;
// First process packed formats which are easy to write
if constexpr (std::is_integral_v<PackedType<bytes>>) {
pixel |= (data & mask) << bit_offset;
} else {
// Assume component_bits and offset are byte aligned. Otherwise
// this would be extremely complicated
using ComponentType = PackedType<(component_bits >> 3)>;
static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0);
static_assert(std::is_integral_v<ComponentType>);
constexpr u64 byte_offset = bit_offset >> 3;
*reinterpret_cast<ComponentType*>(DataPtr(pixel) + byte_offset) = data;
}
}
constexpr bool CanUseRotation();
// Lookup table that maps component i of source format
// to component mapping[i] of the destination format
template <vk::Format source_format, u8 source_components,
vk::Format dest_format, u8 dest_components>
constexpr auto ComponentMapping() {
// Since there are at most 4 components we can use 2 bits for each index
u8 mapping = 0xFF;
for (u8 i = 0; i < source_components; i++) {
auto source_name = vk::componentName(source_format, i);
for (u8 j = 0; j < dest_components; j++) {
auto dest_name = vk::componentName(dest_format, j);
if (std::string_view{source_name} == std::string_view{dest_name}) {
mapping |= ((j & 0x3) << 2 * i);
break;
}
}
}
return mapping;
}
// Allows for loop like iteration at compile time
template <auto Start, auto End, class F>
constexpr void ConstexprFor(F&& f) {
if constexpr (Start < End) {
f(std::integral_constant<decltype(Start), Start>());
ConstexprFor<Start + 1, End>(f);
}
}
// Copies pixel data from a source to a destionation buffer, performing
// format conversion at the same time
template <vk::Format source_format, u8 source_bytes,
vk::Format dest_format, u8 dest_bytes>
constexpr void Convert(std::span<const u8> source, std::span<u8> dest) {
constexpr u32 source_components = vk::componentCount(source_format);
constexpr u32 dest_components = vk::componentCount(dest_format);
// Create a table with the required component mapping
constexpr auto mapping = ComponentMapping<source_format, source_components,
dest_format, dest_components>();
u32 source_offset = 0;
u32 dest_offset = 0;
while (source_offset < source.size()) {
// Load source pixel
PackedType<source_bytes> source_pixel;
std::memcpy(DataPtr<source_bytes>(source_pixel),
source.data() + source_offset, source_bytes);
PackedType<dest_bytes> dest_pixel{};
// OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations
// of one another. We can use a faster path for these
ConstexprFor<0, source_components>([&](auto comp) {
constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3;
// If the component is not mapped skip it
if constexpr (dest_comp == 0xFF) {
return;
}
// Retrieve component
u32 component = GetComponent<source_format, source_bytes, comp>(source_pixel);
constexpr bool is_source_float = IsFloat<source_format>(comp);
constexpr bool is_dest_float = IsFloat<dest_format>(dest_comp);
// Perform float <-> int conversion (normalization)
if constexpr (is_source_float && !is_dest_float) {
float temp;
std::memcpy(&temp, &component, sizeof(float));
constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1;
component = static_cast<u32>(temp * mask);
} else if constexpr (!is_source_float && is_dest_float) {
constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1;
float temp = static_cast<float>(component) / mask;
std::memcpy(&component, &temp, sizeof(float));
}
SetComponent<dest_format, dest_bytes, dest_comp>(dest_pixel, component);
});
// Write destination pixel (dest_bytes includes the padding so we cannot use it here)
std::memcpy(dest.data() + dest_offset, DataPtr<dest_bytes>(dest_pixel),
vk::blockSize(dest_format));
// Copy next pixel
source_offset += source_bytes;
dest_offset += dest_bytes;
}
}
} // namespace VideoCore::Vulkan

View File

@ -2,52 +2,148 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <fstream>
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <span>
#include <array>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
std::unique_ptr<Instance> g_vk_instace;
Instance::Instance(Frontend::EmuWindow& window) {
auto window_info = window.GetWindowInfo();
// Enable the instance extensions the backend uses
auto extensions = GetInstanceExtensions(window_info.type, true);
// We require a Vulkan 1.1 driver
const u32 available_version = vk::enumerateInstanceVersion();
if (available_version < VK_API_VERSION_1_1) {
LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!");
}
const vk::ApplicationInfo application_info = {
.pApplicationName = "Citra",
.applicationVersion = VK_MAKE_VERSION(1, 0, 0),
.pEngineName = "Citra Vulkan",
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
.apiVersion = available_version
};
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
const vk::InstanceCreateInfo instance_info = {
.pApplicationInfo = &application_info,
.enabledLayerCount = static_cast<u32>(layers.size()),
.ppEnabledLayerNames = layers.data(),
.enabledExtensionCount = static_cast<u32>(extensions.size()),
.ppEnabledExtensionNames = extensions.data()
};
// Create VkInstance
instance = vk::createInstance(instance_info);
surface = CreateSurface(instance, window);
// TODO: GPU select dialog
physical_device = instance.enumeratePhysicalDevices()[0];
device_limits = physical_device.getProperties().limits;
// Create logical device
CreateDevice(true);
}
Instance::~Instance() {
device.waitIdle();
device.destroy();
instance.destroy();
}
bool Instance::Create(vk::Instance new_instance, vk::PhysicalDevice gpu,
vk::SurfaceKHR surface, bool enable_validation_layer) {
instance = new_instance;
physical_device = gpu;
// Get physical device limits
device_limits = physical_device.getProperties().limits;
bool Instance::CreateDevice(bool validation_enabled) {
// Determine required extensions and features
if (!FindExtensions() || !FindFeatures())
return false;
auto feature_chain = physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
vk::PhysicalDeviceDynamicRenderingFeaturesKHR,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>();
// Create logical device
return CreateDevice(surface, enable_validation_layer);
}
// Not having geometry shaders or wide lines will cause issues with rendering.
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
if (!available.geometryShader && !available.wideLines) {
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!");
}
bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
// Can't create an instance without a valid surface
if (!surface) {
LOG_CRITICAL(Render_Vulkan, "Invalid surface provided during instance creation!");
// Enable some common features other emulators like Dolphin use
const vk::PhysicalDeviceFeatures2 features = {
.features = {
.robustBufferAccess = available.robustBufferAccess,
.geometryShader = available.geometryShader,
.sampleRateShading = available.sampleRateShading,
.dualSrcBlend = available.dualSrcBlend,
.logicOp = available.logicOp,
.depthClamp = available.depthClamp,
.largePoints = available.largePoints,
.samplerAnisotropy = available.samplerAnisotropy,
.occlusionQueryPrecise = available.occlusionQueryPrecise,
.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
.shaderClipDistance = available.shaderClipDistance
}
};
// Enable newer Vulkan features
auto enabled_features = vk::StructureChain{
features,
feature_chain.get<vk::PhysicalDeviceDynamicRenderingFeaturesKHR>(),
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
feature_chain.get<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>()
};
auto extension_list = physical_device.enumerateDeviceExtensionProperties();
if (extension_list.empty()) {
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
return false;
}
// List available device extensions
for (const auto& extension : extension_list) {
LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName);
}
// Helper lambda for adding extensions
std::array<const char*, 6> enabled_extensions;
u32 enabled_extension_count = 0;
auto AddExtension = [&](std::string_view name, bool required) -> bool {
auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) {
return name.compare(prop.extensionName.data());
});
if (result != extension_list.end()) {
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
enabled_extensions[enabled_extension_count++] = name.data();
return true;
}
if (required) {
LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
}
return false;
};
// Add required extensions
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true);
// Check for optional features
dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
// Search queue families for graphics and present queues
auto family_properties = physical_device.getQueueFamilyProperties();
if (family_properties.empty()) {
LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues.");
return false;
}
// Search queue families for graphics and present queues
graphics_queue_family_index = -1;
present_queue_family_index = -1;
for (int i = 0; i < family_properties.size(); i++) {
@ -68,24 +164,35 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
}
}
if (graphics_queue_family_index == -1 ||
present_queue_family_index == -1) {
if (graphics_queue_family_index == -1 || present_queue_family_index == -1) {
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
return false;
}
static constexpr float queue_priorities[] = {1.0f};
const std::array layers{"VK_LAYER_KHRONOS_validation"};
const std::array queue_infos{
vk::DeviceQueueCreateInfo{{}, graphics_queue_family_index, 1, queue_priorities},
vk::DeviceQueueCreateInfo{{}, present_queue_family_index, 1, queue_priorities}
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
const std::array queue_infos = {
vk::DeviceQueueCreateInfo{
.queueFamilyIndex = graphics_queue_family_index,
.queueCount = 1,
.pQueuePriorities = queue_priorities
},
vk::DeviceQueueCreateInfo{
.queueFamilyIndex = present_queue_family_index,
.queueCount = 1,
.pQueuePriorities = queue_priorities
}
};
vk::DeviceCreateInfo device_info({}, 1, queue_infos.data(), 0, nullptr,
extensions.size(), extensions.data(), nullptr, &features);
vk::DeviceCreateInfo device_info = {
.pNext = &enabled_features,
.queueCreateInfoCount = 1,
.pQueueCreateInfos = queue_infos.data(),
.enabledExtensionCount = enabled_extension_count,
.ppEnabledExtensionNames = enabled_extensions.data(),
};
// Create queue create info structs
if (graphics_queue_family_index != present_queue_family_index) {
device_info.queueCreateInfoCount = 2;
}
@ -104,87 +211,67 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
graphics_queue = device.getQueue(graphics_queue_family_index, 0);
present_queue = device.getQueue(present_queue_family_index, 0);
return true;
}
bool Instance::FindFeatures() {
auto available = physical_device.getFeatures();
// Not having geometry shaders or wide lines will cause issues with rendering.
if (!available.geometryShader && !available.wideLines) {
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Rendering will be limited");
}
// Enable some common features other emulators like Dolphin use
vk_features.dualSrcBlend = available.dualSrcBlend;
vk_features.geometryShader = available.geometryShader;
vk_features.samplerAnisotropy = available.samplerAnisotropy;
vk_features.logicOp = available.logicOp;
vk_features.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics;
vk_features.sampleRateShading = available.sampleRateShading;
vk_features.largePoints = available.largePoints;
vk_features.shaderStorageImageMultisample = available.shaderStorageImageMultisample;
vk_features.occlusionQueryPrecise = available.occlusionQueryPrecise;
vk_features.shaderClipDistance = available.shaderClipDistance;
vk_features.depthClamp = available.depthClamp;
vk_features.textureCompressionBC = available.textureCompressionBC;
// Enable newer Vulkan features
vk12_features.timelineSemaphore = true;
vk13_features.dynamicRendering = true;
dynamic_state_features.extendedDynamicState = true;
dynamic_state2_features.extendedDynamicState2 = true;
// Include features in device creation
vk12_features.pNext = &vk13_features;
vk13_features.pNext = &dynamic_state_features;
dynamic_state_features.pNext = &dynamic_state2_features;
features = vk::PhysicalDeviceFeatures2{vk_features, &vk12_features};
// Create the VMA allocator
CreateAllocator();
return true;
}
bool Instance::FindExtensions() {
auto available = physical_device.enumerateDeviceExtensionProperties();
if (available.empty()) {
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
return false;
}
// List available device extensions
for (const auto& prop : available) {
LOG_INFO(Render_Vulkan, "Vulkan extension: {}", prop.extensionName);
}
// Helper lambda for adding extensions
auto AddExtension = [&](const char* name, bool required) {
auto result = std::find_if(available.begin(), available.end(), [&](const auto& prop) {
return !std::strcmp(name, prop.extensionName);
});
if (result != available.end()) {
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
extensions.push_back(name);
return true;
}
if (required) {
LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
}
return false;
void Instance::CreateAllocator() {
VmaVulkanFunctions functions = {
.vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
.vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr
};
// Add required extensions
if (!AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true) ||
!AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, true) ||
!AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, true) ||
!AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME, true) ||
!AddExtension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME, true)) {
return false;
VmaAllocatorCreateInfo allocator_info = {
.physicalDevice = physical_device,
.device = device,
.pVulkanFunctions = &functions,
.instance = instance,
.vulkanApiVersion = VK_API_VERSION_1_1
};
vmaCreateAllocator(&allocator_info, &allocator);
}
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
if (auto iter = supported.find(format); iter != supported.end()) {
return (iter->second.optimalTilingFeatures & usage) == usage;
}
return true;
// Cache format properties so we don't have to query the driver all the time
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
supported.insert(std::make_pair(format, properties));
return (properties.optimalTilingFeatures & usage) == usage;
}
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
if (IsFormatSupported(format, features)) {
return format;
}
// Return the most supported alternative format preferably with the
// same block size according to the Vulkan spec.
// See 43.3. Required Format Support of the Vulkan spec
switch (format) {
case vk::Format::eD24UnormS8Uint:
return vk::Format::eD32SfloatS8Uint;
case vk::Format::eX8D24UnormPack32:
return vk::Format::eD32Sfloat;
case vk::Format::eR5G5B5A1UnormPack16:
return vk::Format::eA1R5G5B5UnormPack16;
case vk::Format::eR4G4B4A4UnormPack16:
return vk::Format::eB4G4R4A4UnormPack16;
case vk::Format::eR8G8B8Unorm:
return vk::Format::eR8G8B8A8Unorm;
default:
LOG_WARNING(Render_Vulkan, "Unable to find compatible alternative to format = {} with usage {}",
vk::to_string(format), vk::to_string(features));
return vk::Format::eR8G8B8A8Unorm;
}
}
} // namespace Vulkan

View File

@ -8,61 +8,104 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
namespace Frontend {
class EmuWindow;
}
namespace VideoCore::Vulkan {
/// The global Vulkan instance
class Instance {
public:
Instance() = default;
Instance(Frontend::EmuWindow& window);
~Instance();
/// Construct global Vulkan context
bool Create(vk::Instance instance, vk::PhysicalDevice gpu,
vk::SurfaceKHR surface, bool enable_validation_layer);
/// Returns the Vulkan instance
vk::Instance GetInstance() const {
return instance;
}
vk::Device GetDevice() const { return device; }
vk::PhysicalDevice GetPhysicalDevice() const { return physical_device; }
vk::Instance GetInstance() const { return instance; }
/// Returns the Vulkan surface
vk::SurfaceKHR GetSurface() const {
return surface;
}
/// Returns the current physical device
vk::PhysicalDevice GetPhysicalDevice() const {
return physical_device;
}
/// Returns the Vulkan device
vk::Device GetDevice() const {
return device;
}
VmaAllocator GetAllocator() const {
return allocator;
}
/// Retrieve queue information
u32 GetGraphicsQueueFamilyIndex() const { return graphics_queue_family_index; }
u32 GetPresentQueueFamilyIndex() const { return present_queue_family_index; }
vk::Queue GetGraphicsQueue() const { return graphics_queue; }
vk::Queue GetPresentQueue() const { return present_queue; }
u32 GetGraphicsQueueFamilyIndex() const {
return graphics_queue_family_index;
}
u32 GetPresentQueueFamilyIndex() const {
return present_queue_family_index;
}
vk::Queue GetGraphicsQueue() const {
return graphics_queue;
}
vk::Queue GetPresentQueue() const {
return present_queue;
}
/// Feature support
bool SupportsAnisotropicFiltering() const;
u32 UniformMinAlignment() const { return static_cast<u32>(device_limits.minUniformBufferOffsetAlignment); }
bool IsDynamicRenderingSupported() const {
return dynamic_rendering;
}
bool IsExtendedDynamicStateSupported() const {
return extended_dynamic_state;
}
bool IsPushDescriptorsSupported() const {
return push_descriptors;
}
/// Returns the minimum required alignment for uniforms
vk::DeviceSize UniformMinAlignment() const {
return device_limits.minUniformBufferOffsetAlignment;
}
/// Returns true when the format supports the provided feature flags
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
/// Returns the most compatible format that supports the provided feature flags
vk::Format GetFormatAlternative(vk::Format format) const;
private:
bool CreateDevice(vk::SurfaceKHR surface, bool validation_enabled);
bool FindExtensions();
bool FindFeatures();
bool CreateDevice(bool validation_enabled);
void CreateAllocator();
public:
private:
// Queue family indexes
u32 present_queue_family_index{}, graphics_queue_family_index{};
u32 present_queue_family_index = 0, graphics_queue_family_index = 0;
vk::Queue present_queue, graphics_queue;
// Core vulkan objects
vk::Device device;
vk::PhysicalDevice physical_device;
vk::Instance instance;
vk::Device device;
// Extensions and features
std::vector<const char*> extensions;
vk::PhysicalDeviceFeatures2 features{};
vk::SurfaceKHR surface;
vk::PhysicalDeviceLimits device_limits;
VmaAllocator allocator;
// Features per vulkan version
vk::PhysicalDeviceFeatures vk_features{};
vk::PhysicalDeviceVulkan13Features vk13_features{};
vk::PhysicalDeviceVulkan12Features vk12_features{};
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state_features{};
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state2_features{};
vk::PhysicalDeviceColorWriteEnableFeaturesEXT color_write_features{};
bool dynamic_rendering = false;
bool extended_dynamic_state = false;
bool push_descriptors = false;
};
extern std::unique_ptr<Instance> g_vk_instace;
} // namespace Vulkan
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,414 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pica_to_vulkan.h"
#include "video_core/renderer_vulkan/vk_pipeline.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_texture.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace VideoCore::Vulkan {
// Maximum binding per descriptor set
constexpr u32 MAX_BINDING_SLOTS = 7;
vk::ShaderStageFlags ToVkStageFlags(BindingType type) {
vk::ShaderStageFlags flags;
switch (type) {
case BindingType::Sampler:
case BindingType::Texture:
case BindingType::TexelBuffer:
flags = vk::ShaderStageFlagBits::eFragment;
break;
case BindingType::StorageImage:
case BindingType::Uniform:
case BindingType::UniformDynamic:
flags = vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex |
vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eCompute;
break;
default:
LOG_ERROR(Render_Vulkan, "Unknown descriptor type!");
}
return flags;
}
vk::DescriptorType ToVkDescriptorType(BindingType type) {
switch (type) {
case BindingType::Uniform:
return vk::DescriptorType::eUniformBuffer;
case BindingType::UniformDynamic:
return vk::DescriptorType::eUniformBufferDynamic;
case BindingType::TexelBuffer:
return vk::DescriptorType::eUniformTexelBuffer;
case BindingType::Texture:
return vk::DescriptorType::eSampledImage;
case BindingType::Sampler:
return vk::DescriptorType::eSampler;
case BindingType::StorageImage:
return vk::DescriptorType::eStorageImage;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown descriptor type!");
UNREACHABLE();
}
}
u32 AttribBytes(VertexAttribute attrib) {
switch (attrib.type) {
case AttribType::Float:
return sizeof(float) * attrib.components;
case AttribType::Int:
return sizeof(u32) * attrib.components;
case AttribType::Short:
return sizeof(u16) * attrib.components;
}
}
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
switch (attrib.type) {
case AttribType::Float:
switch (attrib.components) {
case 1: return vk::Format::eR32Sfloat;
case 2: return vk::Format::eR32G32Sfloat;
case 3: return vk::Format::eR32G32B32Sfloat;
case 4: return vk::Format::eR32G32B32A32Sfloat;
}
default:
LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!");
UNREACHABLE();
}
}
vk::ShaderStageFlagBits ToVkShaderStage(ShaderStage stage) {
switch (stage) {
case ShaderStage::Vertex:
return vk::ShaderStageFlagBits::eVertex;
case ShaderStage::Fragment:
return vk::ShaderStageFlagBits::eFragment;
case ShaderStage::Geometry:
return vk::ShaderStageFlagBits::eGeometry;
case ShaderStage::Compute:
return vk::ShaderStageFlagBits::eCompute;
default:
LOG_CRITICAL(Render_Vulkan, "Undefined shader stage!");
UNREACHABLE();
}
}
PipelineLayout::PipelineLayout(Instance& instance, PipelineLayoutInfo info) :
instance(instance), set_layout_count(info.group_count) {
// Used as temp storage for CreateDescriptorSet
std::array<vk::DescriptorSetLayoutBinding, MAX_BINDING_SLOTS> set_bindings;
std::array<vk::DescriptorUpdateTemplateEntry, MAX_BINDING_SLOTS> update_entries;
vk::Device device = instance.GetDevice();
for (u32 set = 0; set < set_layout_count; set++) {
auto& group = info.binding_groups[set];
u32 binding = 0;
while (group[binding] != BindingType::None) {
const BindingType type = group[binding];
set_bindings[binding] = vk::DescriptorSetLayoutBinding{
.binding = binding,
.descriptorType = ToVkDescriptorType(type),
.descriptorCount = 1,
.stageFlags = ToVkStageFlags(type)
};
// Also create update template to speed up descriptor writes
update_entries[binding] = vk::DescriptorUpdateTemplateEntry{
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = ToVkDescriptorType(type),
.offset = binding * sizeof(DescriptorData),
.stride = sizeof(DescriptorData)
};
binding++;
}
const vk::DescriptorSetLayoutCreateInfo layout_info = {
.bindingCount = binding,
.pBindings = set_bindings.data()
};
// Create descriptor set layout
set_layouts[set] = device.createDescriptorSetLayout(layout_info);
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = binding,
.pDescriptorUpdateEntries = update_entries.data(),
.descriptorSetLayout = set_layouts[set]
};
// Create descriptor set update template
update_templates[set] = device.createDescriptorUpdateTemplate(template_info);
}
// Create pipeline layout
const vk::PushConstantRange range = {
.offset = 0,
.size = info.push_constant_block_size
};
bool push_constants = info.push_constant_block_size > 0;
const u32 range_count = push_constants ? 1u : 0u;
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = set_layout_count,
.pSetLayouts = set_layouts.data(),
.pushConstantRangeCount = range_count,
.pPushConstantRanges = &range
};
pipeline_layout = device.createPipelineLayout(layout_info);
}
PipelineLayout::~PipelineLayout() {
vk::Device device = instance.GetDevice();
device.destroyPipelineLayout(pipeline_layout);
u32 i = 0;
while (set_layouts[i] && update_templates[i]) {
device.destroyDescriptorSetLayout(set_layouts[i]);
device.destroyDescriptorUpdateTemplate(update_templates[i]);
}
}
Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type,
PipelineInfo info, vk::PipelineCache cache) : PipelineBase(type, info),
instance(instance), owner(owner) {
vk::Device device = instance.GetDevice();
u32 shader_count = 0;
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
for (int i = 0; i < info.shaders.size(); i++) {
auto& shader = info.shaders[i];
if (!shader.IsValid()) {
shader_count = i;
break;
}
Shader* vk_shader = static_cast<Shader*>(shader.Get());
shader_stages[i] = vk::PipelineShaderStageCreateInfo{
.stage = ToVkShaderStage(shader->GetStage()),
.module = vk_shader->GetHandle(),
.pName = shader->GetName().data(),
};
}
// Create a graphics pipeline
if (type == PipelineType::Graphics) {
const vk::VertexInputBindingDescription binding_desc = {
.binding = 0,
.stride = info.vertex_layout.stride
};
// Populate vertex attribute structures
u32 attribute_count = 0;
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attribute_desc;
for (u32 i = 0; i < MAX_VERTEX_ATTRIBUTES; i++) {
auto& attr = info.vertex_layout.attributes[i];
if (attr.components == 0) {
attribute_count = i;
break;
}
attribute_desc[i] = vk::VertexInputAttributeDescription{
.location = i,
.binding = 0,
.format = ToVkAttributeFormat(attr),
.offset = (i > 0 ? attribute_desc[i - 1].offset +
AttribBytes(info.vertex_layout.attributes[i - 1]) : 0)
};
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = &binding_desc,
.vertexAttributeDescriptionCount = attribute_count,
.pVertexAttributeDescriptions = attribute_desc.data()
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
.primitiveRestartEnable = false
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
.frontFace = vk::FrontFace::eClockwise,
.depthBiasEnable = false,
.lineWidth = 1.0f
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = false
};
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
.blendEnable = true,
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
.colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask)
};
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = true,
.logicOp = vk::LogicOp::eCopy, // TODO
.attachmentCount = 1,
.pAttachments = &colorblend_attachment,
};
const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
const std::array dynamic_states = {
vk::DynamicState::eViewport,
vk::DynamicState::eScissor,
vk::DynamicState::eLineWidth,
vk::DynamicState::eStencilCompareMask,
vk::DynamicState::eStencilWriteMask,
vk::DynamicState::eStencilReference,
// VK_EXT_extended_dynamic_state
vk::DynamicState::eCullModeEXT,
vk::DynamicState::eDepthCompareOpEXT,
vk::DynamicState::eDepthTestEnableEXT,
vk::DynamicState::eDepthWriteEnableEXT,
vk::DynamicState::eFrontFaceEXT,
vk::DynamicState::ePrimitiveTopologyEXT,
vk::DynamicState::eStencilOpEXT,
vk::DynamicState::eStencilTestEnableEXT,
};
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = extended_dynamic_states ? 14u : 6u,
.pDynamicStates = dynamic_states.data()
};
const vk::StencilOpState stencil_op_state = {
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op),
.compareMask = static_cast<u32>(info.depth_stencil.stencil_compare_mask.Value()),
.writeMask = static_cast<u32>(info.depth_stencil.stencil_write_mask.Value()),
.reference = static_cast<u32>(info.depth_stencil.stencil_reference.Value())
};
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
.depthBoundsTestEnable = false,
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
.front = stencil_op_state,
.back = stencil_op_state
};
const vk::GraphicsPipelineCreateInfo pipeline_info = {
.stageCount = shader_count,
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
.pInputAssemblyState = &input_assembly,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = owner.GetLayout(),
.renderPass = {}
};
if (auto result = device.createGraphicsPipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) {
pipeline = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
UNREACHABLE();
}
} else { // Compute pipeline
ASSERT(shader_count == 1);
const vk::ComputePipelineCreateInfo pipeline_info = {
.stage = shader_stages[0],
.layout = owner.GetLayout()
};
if (auto result = device.createComputePipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) {
pipeline = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!");
UNREACHABLE();
}
}
}
Pipeline::~Pipeline() {
vk::Device device = instance.GetDevice();
device.destroyPipeline(pipeline);
}
void Pipeline::BindTexture(u32 group, u32 slot, TextureHandle handle) {
Texture* texture = static_cast<Texture*>(handle.Get());
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.imageView = texture->GetView(),
.imageLayout = texture->GetLayout()
}
};
owner.SetBinding(group, slot, data);
}
void Pipeline::BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view) {
Buffer* buffer = static_cast<Buffer*>(handle.Get());
// Texel buffers are bound with their views
if (buffer->GetUsage() == BufferUsage::Texel) {
const DescriptorData data = {
.buffer_view = buffer->GetView(view)
};
owner.SetBinding(group, slot, data);
} else {
const DescriptorData data = {
.buffer_info = vk::DescriptorBufferInfo{
.buffer = buffer->GetHandle(),
.offset = 0,
.range = buffer->GetCapacity()
}
};
owner.SetBinding(group, slot, data);
}
}
void Pipeline::BindSampler(u32 group, u32 slot, SamplerHandle handle) {
Sampler* sampler = static_cast<Sampler*>(handle.Get());
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.sampler = sampler->GetHandle()
}
};
owner.SetBinding(group, slot, data);
}
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,96 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "video_core/common/pipeline.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore::Vulkan {
class Instance;
class CommandScheduler;
union DescriptorData {
vk::DescriptorImageInfo image_info{};
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
};
/**
* Stores the pipeline layout as well as the descriptor set layouts
* and update templates associated with those layouts.
* Functions as the "parent" to a group of pipelines that share the same layout
*/
class PipelineLayout {
public:
PipelineLayout(Instance& instance, PipelineLayoutInfo info);
~PipelineLayout();
// Disable copy constructor
PipelineLayout(const PipelineLayout&) = delete;
PipelineLayout& operator=(const PipelineLayout&) = delete;
// Assigns data to a particular binding
void SetBinding(u32 set, u32 binding, DescriptorData data) {
update_data[set][binding] = data;
}
// Returns the most current descriptor update data
std::span<DescriptorData> GetData(u32 set) {
return std::span{update_data.at(set).data(), set_layout_count};
}
// Returns the underlying vulkan pipeline layout handle
vk::PipelineLayout GetLayout() const {
return pipeline_layout;
}
// Returns the descriptor set update template handle associated with the provided set index
vk::DescriptorUpdateTemplate GetUpdateTemplate(u32 set) const {
return update_templates.at(set);
}
private:
Instance& instance;
vk::PipelineLayout pipeline_layout = VK_NULL_HANDLE;
u32 set_layout_count = 0;
std::array<vk::DescriptorSetLayout, MAX_BINDING_GROUPS> set_layouts;
std::array<vk::DescriptorUpdateTemplate, MAX_BINDING_GROUPS> update_templates;
// Update data for the descriptor sets
using SetData = std::array<DescriptorData, MAX_BINDINGS_IN_GROUP>;
std::array<SetData, MAX_BINDING_GROUPS> update_data;
};
class Pipeline : public VideoCore::PipelineBase {
public:
Pipeline(Instance& instance, PipelineLayout& owner,
PipelineType type, PipelineInfo info, vk::PipelineCache cache);
~Pipeline() override;
void BindTexture(u32 group, u32 slot, TextureHandle handle) override;
void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) override;
void BindSampler(u32 group, u32 slot, SamplerHandle handle) override;
/// Returns the layout tracker that owns this pipeline
PipelineLayout& GetOwner() const {
return owner;
}
/// Returns the underlying vulkan pipeline handle
vk::Pipeline GetHandle() const {
return pipeline;
}
private:
Instance& instance;
PipelineLayout& owner;
vk::Pipeline pipeline;
};
} // namespace VideoCore::Vulkan

View File

@ -1,267 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_pipeline_builder.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_shader_state.h"
#include <algorithm>
#include <array>
#include <type_traits>
namespace Vulkan {
PipelineLayoutBuilder::PipelineLayoutBuilder() {
Clear();
}
void PipelineLayoutBuilder::Clear() {
pipeline_layout_info = vk::PipelineLayoutCreateInfo{};
}
vk::PipelineLayout PipelineLayoutBuilder::Build() {
auto device = g_vk_instace->GetDevice();
auto result = device.createPipelineLayout(pipeline_layout_info);
if (!result) {
LOG_ERROR(Render_Vulkan, "Failed to create pipeline layout");
return VK_NULL_HANDLE;
}
return result;
}
void PipelineLayoutBuilder::AddDescriptorSet(vk::DescriptorSetLayout layout) {
assert(pipeline_layout_info.setLayoutCount < MAX_SETS);
sets[pipeline_layout_info.setLayoutCount++] = layout;
pipeline_layout_info.pSetLayouts = sets.data();
}
void PipelineLayoutBuilder::AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size) {
assert(pipeline_layout_info.pushConstantRangeCount < MAX_PUSH_CONSTANTS);
push_constants[pipeline_layout_info.pushConstantRangeCount++] = {stages, offset, size};
pipeline_layout_info.pPushConstantRanges = push_constants.data();
}
PipelineBuilder::PipelineBuilder() {
Clear();
}
void PipelineBuilder::Clear() {
pipeline_info = vk::GraphicsPipelineCreateInfo{};
shader_stages.clear();
vertex_input_state = vk::PipelineVertexInputStateCreateInfo{};
input_assembly = vk::PipelineInputAssemblyStateCreateInfo{};
rasterization_state = vk::PipelineRasterizationStateCreateInfo{};
depth_state = vk::PipelineDepthStencilStateCreateInfo{};
blend_state = vk::PipelineColorBlendStateCreateInfo{};
blend_attachment = vk::PipelineColorBlendAttachmentState{};
dynamic_info = vk::PipelineDynamicStateCreateInfo{};
dynamic_states.fill({});
viewport_state = vk::PipelineViewportStateCreateInfo{};
multisample_info = vk::PipelineMultisampleStateCreateInfo{};
// Set defaults
SetNoCullRasterizationState();
SetNoDepthTestState();
SetNoBlendingState();
SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleList);
// Have to be specified even if dynamic
SetViewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
SetScissorRect(0, 0, 1, 1);
SetBlendConstants(1.0f, 1.0f, 1.0f, 1.0f);
SetMultisamples(vk::SampleCountFlagBits::e1, false);
}
vk::Pipeline PipelineBuilder::Build() {
auto device = g_vk_instace->GetDevice();
auto result = device.createGraphicsPipeline({}, pipeline_info);
if (result.result != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Failed to build vulkan pipeline!");
UNREACHABLE();
}
return result.value;
}
void PipelineBuilder::SetPipelineLayout(vk::PipelineLayout layout) {
pipeline_info.layout = layout;
}
void PipelineBuilder::SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module) {
auto result = std::ranges::find_if(shader_stages.begin(), shader_stages.end(), [stage](const auto& info) {
return info.stage == stage;
});
/* If the stage already exists, just replace the module */
if (result != shader_stages.end()) {
result->module = module;
}
else {
shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags(), stage, module, "main");
pipeline_info.stageCount++;
}
pipeline_info.pStages = shader_stages.data();
}
void PipelineBuilder::AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate,
std::span<vk::VertexInputAttributeDescription> attributes) {
// Copy attributes to private array
auto loc = vertex_attributes.begin() + vertex_input_state.vertexAttributeDescriptionCount;
std::copy(attributes.begin(), attributes.end(), loc);
vertex_buffers[vertex_input_state.vertexBindingDescriptionCount++] = {binding, stride, input_rate};
vertex_input_state.vertexAttributeDescriptionCount += attributes.size();
vertex_input_state.pVertexBindingDescriptions = vertex_buffers.data();
vertex_input_state.pVertexAttributeDescriptions = vertex_attributes.data();
pipeline_info.pVertexInputState = &vertex_input_state;
}
void PipelineBuilder::SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart) {
input_assembly.topology = topology;
input_assembly.primitiveRestartEnable = enable_primitive_restart;
pipeline_info.pInputAssemblyState = &input_assembly;
}
void PipelineBuilder::SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode,
vk::FrontFace front_face) {
rasterization_state.polygonMode = polygon_mode;
rasterization_state.cullMode = cull_mode;
rasterization_state.frontFace = front_face;
pipeline_info.pRasterizationState = &rasterization_state;
}
void PipelineBuilder::SetLineWidth(float width) {
rasterization_state.lineWidth = width;
pipeline_info.pRasterizationState = &rasterization_state;
}
void PipelineBuilder::SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading) {
multisample_info.rasterizationSamples = samples;
multisample_info.sampleShadingEnable = per_sample_shading;
multisample_info.minSampleShading = (static_cast<u32>(samples) > 1) ? 1.0f : 0.0f;
pipeline_info.pMultisampleState = &multisample_info;
}
void PipelineBuilder::SetNoCullRasterizationState() {
SetRasterizationState(vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, vk::FrontFace::eClockwise);
}
void PipelineBuilder::SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op) {
depth_state.depthTestEnable = depth_test;
depth_state.depthWriteEnable = depth_write;
depth_state.depthCompareOp = compare_op;
pipeline_info.pDepthStencilState = &depth_state;
}
void PipelineBuilder::SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back) {
depth_state.stencilTestEnable = stencil_test;
depth_state.front = front;
depth_state.back = back;
pipeline_info.pDepthStencilState = &depth_state;
}
void PipelineBuilder::SetNoStencilState() {
depth_state.stencilTestEnable = VK_FALSE;
depth_state.front = vk::StencilOpState{};
depth_state.back = vk::StencilOpState{};
}
void PipelineBuilder::SetNoDepthTestState() {
SetDepthState(false, false, vk::CompareOp::eAlways);
}
void PipelineBuilder::SetBlendConstants(float r, float g, float b, float a) {
blend_state.blendConstants = std::array<float, 4>{r, g, b, a};
pipeline_info.pColorBlendState = &blend_state;
}
void PipelineBuilder::SetBlendLogicOp(vk::LogicOp logic_op) {
blend_state.logicOp = logic_op;
blend_state.logicOpEnable = false;
}
void PipelineBuilder::SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor,
vk::BlendOp op, vk::BlendFactor alpha_src_factor,
vk::BlendFactor alpha_dst_factor, vk::BlendOp alpha_op,
vk::ColorComponentFlags write_mask) {
blend_attachment.blendEnable = blend_enable;
blend_attachment.srcColorBlendFactor = src_factor;
blend_attachment.dstColorBlendFactor = dst_factor;
blend_attachment.colorBlendOp = op;
blend_attachment.srcAlphaBlendFactor = alpha_src_factor;
blend_attachment.dstAlphaBlendFactor = alpha_dst_factor;
blend_attachment.alphaBlendOp = alpha_op;
blend_attachment.colorWriteMask = write_mask;
blend_state.attachmentCount = 1;
blend_state.pAttachments = &blend_attachment;
pipeline_info.pColorBlendState = &blend_state;
}
void PipelineBuilder::SetNoBlendingState() {
SetBlendAttachment(false, vk::BlendFactor::eOne, vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::BlendFactor::eOne,
vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA);
}
void PipelineBuilder::SetDynamicStates(const std::span<vk::DynamicState> states) {
if (states.size() > MAX_DYNAMIC_STATES) {
LOG_ERROR(Render_Vulkan, "Cannot include more dynamic states!");
UNREACHABLE();
}
// Copy the state data
std::copy(states.begin(), states.end(), dynamic_states.begin());
dynamic_info.dynamicStateCount = states.size();
dynamic_info.pDynamicStates = dynamic_states.data();
pipeline_info.pDynamicState = &dynamic_info;
return;
}
void PipelineBuilder::SetRenderingFormats(vk::Format color, vk::Format depth_stencil) {
color_format = color;
depth_stencil_format = depth_stencil;
auto IsStencil = [](vk::Format format) -> bool {
switch (format) {
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD24UnormS8Uint:
case vk::Format::eD32SfloatS8Uint:
return true;
default:
return false;
};
};
const u32 color_attachment_count = color == vk::Format::eUndefined ? 0 : 1;
rendering_info = vk::PipelineRenderingCreateInfo{0, color_attachment_count, &color_format, depth_stencil_format,
IsStencil(depth_stencil) ? depth_stencil : vk::Format::eUndefined};
pipeline_info.pNext = &rendering_info;
}
void PipelineBuilder::SetViewport(float x, float y, float width, float height, float min_depth, float max_depth) {
viewport = vk::Viewport{x, y, width, height, min_depth, max_depth};
viewport_state.pViewports = &viewport;
viewport_state.viewportCount = 1;
pipeline_info.pViewportState = &viewport_state;
}
void PipelineBuilder::SetScissorRect(s32 x, s32 y, u32 width, u32 height) {
scissor = vk::Rect2D{{x, y}, {width, height}};
viewport_state.scissorCount = 1u;
viewport_state.pScissors = &scissor;
pipeline_info.pViewportState = &viewport_state;
}
} // namespace Vulkan

View File

@ -1,108 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include "video_core/renderer_vulkan/vk_texture.h"
namespace Vulkan {
class PipelineLayoutBuilder {
public:
PipelineLayoutBuilder();
~PipelineLayoutBuilder() = default;
void Clear();
vk::PipelineLayout Build();
void AddDescriptorSet(vk::DescriptorSetLayout layout);
void AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size);
private:
static constexpr u32 MAX_SETS = 8;
static constexpr u32 MAX_PUSH_CONSTANTS = 5;
vk::PipelineLayoutCreateInfo pipeline_layout_info;
std::array<vk::DescriptorSetLayout, MAX_SETS> sets;
std::array<vk::PushConstantRange, MAX_PUSH_CONSTANTS> push_constants;
};
class PipelineBuilder {
public:
PipelineBuilder();
~PipelineBuilder() = default;
void Clear();
vk::Pipeline Build();
void SetPipelineLayout(vk::PipelineLayout layout);
void AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate,
const std::span<vk::VertexInputAttributeDescription> attributes);
void SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module);
void SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart = false);
void SetLineWidth(float width);
void SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading);
void SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode,
vk::FrontFace front_face);
void SetNoCullRasterizationState();
void SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op);
void SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back);
void SetNoDepthTestState();
void SetNoStencilState();
void SetBlendConstants(float r, float g, float b, float a);
void SetNoBlendingState();
void SetBlendLogicOp(vk::LogicOp logic_op);
void SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor,
vk::BlendOp op, vk::BlendFactor alpha_src_factor, vk::BlendFactor alpha_dst_factor,
vk::BlendOp alpha_op,vk::ColorComponentFlags write_mask);
void SetViewport(float x, float y, float width, float height, float min_depth, float max_depth);
void SetScissorRect(s32 x, s32 y, u32 width, u32 height);
void SetDynamicStates(const std::span<vk::DynamicState> states);
void SetRenderingFormats(vk::Format color, vk::Format depth_stencil = vk::Format::eUndefined);
private:
static constexpr u32 MAX_DYNAMIC_STATES = 20;
static constexpr u32 MAX_SHADER_STAGES = 3;
static constexpr u32 MAX_VERTEX_BUFFERS = 8;
static constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
vk::GraphicsPipelineCreateInfo pipeline_info;
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
vk::PipelineVertexInputStateCreateInfo vertex_input_state;
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BUFFERS> vertex_buffers;
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> vertex_attributes;
vk::PipelineInputAssemblyStateCreateInfo input_assembly;
vk::PipelineRasterizationStateCreateInfo rasterization_state;
vk::PipelineDepthStencilStateCreateInfo depth_state;
// Blending
vk::PipelineColorBlendStateCreateInfo blend_state;
vk::PipelineColorBlendAttachmentState blend_attachment;
vk::PipelineDynamicStateCreateInfo dynamic_info;
std::array<vk::DynamicState, MAX_DYNAMIC_STATES> dynamic_states;
vk::PipelineViewportStateCreateInfo viewport_state;
vk::Viewport viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
vk::Rect2D scissor;
// Multisampling
vk::PipelineMultisampleStateCreateInfo multisample_info;
vk::PipelineRenderingCreateInfo rendering_info;
vk::Format color_format, depth_stencil_format;
};
} // namespace Vulkan

View File

@ -0,0 +1,130 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
// Include the vulkan platform specific header
#if defined(ANDROID) || defined (__ANDROID__)
#define VK_USE_PLATFORM_ANDROID_KHR 1
#elif defined(_WIN32)
#define VK_USE_PLATFORM_WIN32_KHR 1
#elif defined(__APPLE__)
#define VK_USE_PLATFORM_MACOS_MVK 1
#define VK_USE_PLATFORM_METAL_EXT 1
#else
#ifdef WAYLAND_DISPLAY
#define VK_USE_PLATFORM_WAYLAND_KHR 1
#else // wayland
#define VK_USE_PLATFORM_XLIB_KHR 1
#endif
#endif
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <vector>
#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore::Vulkan {
inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend::EmuWindow& emu_window) {
const auto& window_info = emu_window.GetWindowInfo();
vk::SurfaceKHR surface;
#if VK_USE_PLATFORM_WIN32_KHR
if (window_info.type == Frontend::WindowSystemType::Windows) {
const vk::Win32SurfaceCreateInfoKHR win32_ci = {
.hinstance = nullptr,
.hwnd = static_cast<HWND>(window_info.render_surface)
};
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
}
}
#elif VK_USE_PLATFORM_XLIB_KHR
if (window_info.type == Frontend::WindowSystemType::X11) {
const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
static_cast<Display*>(window_info.display_connection),
reinterpret_cast<Window>(window_info.render_surface)};
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
UNREACHABLE();
}
}
#elif VK_USE_PLATFORM_WAYLAND_KHR
if (window_info.type == Frontend::WindowSystemType::Wayland) {
const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
static_cast<wl_display*>(window_info.display_connection),
static_cast<wl_surface*>(window_info.render_surface)};
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
UNREACHABLE();
}
}
#endif
if (!surface) {
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
}
return surface;
}
inline auto GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
const auto properties = vk::enumerateInstanceExtensionProperties();
if (properties.empty()) {
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
return std::vector<const char*>{};
}
// Add the windowing system specific extension
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
case Frontend::WindowSystemType::Headless:
break;
#if VK_USE_PLATFORM_WIN32_KHR
case Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#elif VK_USE_PLATFORM_XLIB_KHR
case Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
#elif VK_USE_PLATFORM_WAYLAND_KHR
case Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
break;
}
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
for (const char* extension : extensions) {
const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) {
return std::strcmp(extension, prop.extensionName) == 0;
});
if (iter == properties.end()) {
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
return std::vector<const char*>{};
}
}
return extensions;
}
} // namespace VideoCore::Vulkan

View File

@ -5,15 +5,10 @@
#include <algorithm>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/pica_state.h"
@ -21,27 +16,45 @@
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_vulkan/vk_surface_params.h"
#include "video_core/renderer_vulkan/pica_to_vulkan.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/video_core.h"
namespace Vulkan {
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(Vulkan_Blits, "Vulkan", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(Vulkan_CacheManagement, "Vulkan", "Cache Management", MP_RGB(100, 255, 100));
using PixelFormat = SurfaceParams::PixelFormat;
using SurfaceType = SurfaceParams::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
// They shall be big enough for about one frame.
constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr u32 TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
constexpr std::array LUT_LF_VIEWS = {
vk::Format::eR32G32Sfloat
};
constexpr std::array LUT_VIEWS = {
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32B32A32Sfloat
};
RasterizerVulkan::RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window) :
scheduler(scheduler), vertex_buffer(scheduler, VERTEX_BUFFER_SIZE, BufferUsage::Vertex),
index_buffer(scheduler, INDEX_BUFFER_SIZE, BufferUsage::Index),
uniform_buffer(scheduler, UNIFORM_BUFFER_SIZE, BufferUsage::Uniform),
texture_buffer_lut_lf(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_LF_VIEWS),
texture_buffer_lut(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_VIEWS) {
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
// Implement shadow
allow_shadow = false;
@ -65,29 +78,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
uniform_buffer_alignment);
uniform_size_aligned_fs = Common::AlignUp<std::size_t>(sizeof(UniformData),
uniform_buffer_alignment);
// Allocate texture buffer LUTs
Buffer::Info texel_buffer_info = {
.size = TEXTURE_BUFFER_SIZE,
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
.usage = vk::BufferUsageFlagBits::eUniformTexelBuffer |
vk::BufferUsageFlagBits::eTransferDst,
};
texel_buffer_info.view_formats[0] = vk::Format::eR32G32Sfloat;
texture_buffer_lut_lf.Create(texel_buffer_info);
texel_buffer_info.view_formats[1] = vk::Format::eR32G32B32A32Sfloat;
texture_buffer_lut.Create(texel_buffer_info);
// Create and bind uniform buffers
Buffer::Info uniform_info = {
.size = UNIFORM_BUFFER_SIZE,
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
.usage = vk::BufferUsageFlagBits::eUniformBuffer |
vk::BufferUsageFlagBits::eTransferDst
};
uniform_buffer.Create(uniform_info);
auto& state = VulkanState::Get();
state.SetUniformBuffer(0, 0, uniform_size_aligned_vs, uniform_buffer);
state.SetUniformBuffer(1, uniform_size_aligned_vs, uniform_size_aligned_fs, uniform_buffer);
@ -97,26 +87,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
state.SetTexelBuffer(1, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 0);
state.SetTexelBuffer(2, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 1);
// Create vertex and index buffers
Buffer::Info vertex_info = {
.size = VERTEX_BUFFER_SIZE,
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
.usage = vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eTransferDst
};
Buffer::Info index_info = {
.size = INDEX_BUFFER_SIZE,
.properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
.usage = vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eTransferDst
};
vertex_buffer.Create(vertex_info);
index_buffer.Create(index_info);
// Set clear texture color
state.SetPlaceholderColor(255, 0, 0, 255);
state.SetPlaceholderColor(255, 255, 255, 255);
SyncEntireState();
}
@ -238,7 +210,7 @@ void RasterizerVulkan::DrawTriangles() {
}
bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
MICROPROFILE_SCOPE(Vulkan_Drawing);
const auto& regs = Pica::g_state.regs;
auto& state = VulkanState::Get();
@ -252,6 +224,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
const bool using_color_fb =
regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0;
const bool using_depth_fb =
!shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
(write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0);
@ -304,15 +277,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated.
GLint scissor_x1 =
static_cast<GLint>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
GLint scissor_y1 =
static_cast<GLint>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
int scissor_x1 =
static_cast<int>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
int scissor_y1 =
static_cast<int>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
// scaling or doing multisampling.
GLint scissor_x2 =
static_cast<GLint>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
GLint scissor_y2 = static_cast<GLint>(surfaces_rect.bottom +
int scissor_x2 =
static_cast<int>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
int scissor_y2 = static_cast<int>(surfaces_rect.bottom +
(regs.rasterizer.scissor_test.y2 + 1) * res_scale);
if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
@ -420,8 +393,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
depth_surface->texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
}
g_vk_task_scheduler->Submit();
return true;
}
@ -924,22 +895,22 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {
}
void RasterizerVulkan::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
res_cache.FlushAll();
}
void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
res_cache.FlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
res_cache.InvalidateRegion(addr, size, nullptr);
}
void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
res_cache.FlushRegion(addr, size);
res_cache.InvalidateRegion(addr, size, nullptr);
}
@ -949,7 +920,7 @@ void RasterizerVulkan::ClearAll(bool flush) {
}
bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
MICROPROFILE_SCOPE(Vulkan_Blits);
SurfaceParams src_params;
src_params.addr = config.GetPhysicalInputAddress();
@ -1099,7 +1070,7 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
if (framebuffer_addr == 0) {
return false;
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
MICROPROFILE_SCOPE(Vulkan_CacheManagement);
SurfaceParams src_params;
src_params.addr = framebuffer_addr;
@ -1421,7 +1392,7 @@ void RasterizerVulkan::SyncLightSpotDirection(int light_index) {
}
void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
GLfloat dist_atten_bias =
float dist_atten_bias =
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
.ToFloat32();
@ -1432,7 +1403,7 @@ void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
}
void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {
GLfloat dist_atten_scale =
float dist_atten_scale =
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
.ToFloat32();
@ -1444,8 +1415,8 @@ void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {
void RasterizerVulkan::SyncShadowBias() {
const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
if (constant != uniform_block_data.data.shadow_bias_constant ||
linear != uniform_block_data.data.shadow_bias_linear) {
@ -1456,7 +1427,7 @@ void RasterizerVulkan::SyncShadowBias() {
}
void RasterizerVulkan::SyncShadowTextureBias() {
GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1;
int bias = Pica::g_state.regs.texturing.shadow.bias << 1;
if (bias != uniform_block_data.data.shadow_texture_bias) {
uniform_block_data.data.shadow_texture_bias = bias;
uniform_block_data.dirty = true;

View File

@ -5,21 +5,11 @@
#pragma once
#include <array>
#include <cstddef>
#include <cstring>
#include <memory>
#include <vector>
#include <glm/glm.hpp>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/pica_state.h"
#include "video_core/pica_types.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_texturing.h"
#include "video_core/shader/shader.h"
#include "video_core/renderer_vulkan/vk_state.h"
@ -31,7 +21,11 @@ class EmuWindow;
namespace Vulkan {
enum class UniformBindings : u32 { Common, VS, GS };
enum class UniformBindings : u32 {
Common = 0,
VertexShader = 1,
GeometryShader = 2
};
struct LightSrc {
alignas(16) glm::vec3 specular_0;
@ -79,14 +73,13 @@ struct UniformData {
alignas(16) glm::vec4 clip_coef;
};
static_assert(
sizeof(UniformData) == 0x4F0,
static_assert(sizeof(UniformData) == 0x4F0,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
// NOTE: the same rule from UniformData also applies here.
/// NOTE: the same rule from UniformData also applies here.
struct PicaUniformsData {
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);
@ -102,17 +95,18 @@ struct PicaUniformsData {
struct VSUniformData {
PicaUniformsData uniforms;
};
static_assert(
sizeof(VSUniformData) == 1856,
static_assert(sizeof(VSUniformData) == 1856,
"The size of the VSUniformData structure has changed, update the structure in the shader");
static_assert(sizeof(VSUniformData) < 16384,
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
struct ScreenInfo;
class CommandScheduler;
class RasterizerVulkan : public VideoCore::RasterizerInterface {
public:
explicit RasterizerVulkan(Frontend::EmuWindow& emu_window);
explicit RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window);
~RasterizerVulkan() override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
@ -252,6 +246,7 @@ private:
};
private:
CommandScheduler& scheduler;
RasterizerCacheVulkan res_cache;
std::vector<HardwareVertex> vertex_batch;
bool shader_dirty = true;
@ -269,13 +264,7 @@ private:
bool dirty;
} uniform_block_data = {};
// They shall be big enough for about one frame.
static constexpr std::size_t VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
static constexpr std::size_t INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
Buffer vertex_buffer, index_buffer;
StreamBuffer vertex_buffer, index_buffer;
StreamBuffer uniform_buffer, texture_buffer_lut_lf, texture_buffer_lut;
u32 uniform_buffer_alignment;
@ -293,4 +282,4 @@ private:
bool allow_shadow{};
};
} // namespace OpenGL
} // namespace Vulkan

View File

@ -16,19 +16,11 @@
#include <boost/range/iterator_range.hpp>
#include "common/alignment.h"
#include "common/bit_field.h"
#include "common/color.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/texture.h"
#include "common/vector_math.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/pica_state.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_rasterizer_cache.h"
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
@ -375,7 +367,7 @@ static vk::Rect2D FromRect(Common::Rectangle<u32> rect) {
// Allocate an uninitialized texture of appropriate size and format for the surface
void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, vk::Format format,
u32 width, u32 height) {
u32 width, u32 height, bool framebuffer) {
// First check if the texture can be recycled
auto recycled_tex = host_texture_recycler.find({format, width, height});
if (recycled_tex != host_texture_recycler.end()) {
@ -384,11 +376,12 @@ void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, v
return;
}
auto GetUsage = [](SurfaceType type) {
auto GetUsage = [framebuffer](SurfaceType type) {
auto usage = vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eTransferSrc;
if (framebuffer) {
switch (type) {
case SurfaceType::Color:
case SurfaceType::Fill:
@ -402,12 +395,12 @@ void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, v
default:
break;
}
}
return usage;
};
// Otherwise create a brand new texture
u32 levels = std::log2(std::max(width, height)) + 1;
u32 levels = static_cast<u32>(std::log2(std::max(width, height))) + 1;
Texture::Info texture_info{
.width = width,
.height = height,
@ -516,8 +509,9 @@ void CachedSurface::LoadGPUBuffer(PAddr load_start, PAddr load_end) {
const bool need_swap = (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8);
const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr);
if (texture_src_data == nullptr)
if (texture_src_data == nullptr) {
return;
}
if (vk_buffer.empty()) {
vk_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
@ -660,9 +654,9 @@ void CachedSurface::UploadGPUTexture(Common::Rectangle<u32> rect) {
// Load data from memory to the surface
auto buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format);
auto update_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format);
std::span<u8> memory(vk_buffer.data() + buffer_offset, update_size);
std::span<const u8> memory{vk_buffer.data() + buffer_offset, update_size};
texture.Upload(0, 0, stride, FromRect(rect), memory);
texture.Upload(0, 0, stride, memory);
InvalidateAllWatcher();
}
@ -867,7 +861,8 @@ Surface RasterizerCacheVulkan::GetSurface(const SurfaceParams& params, ScaleMatc
SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams& params,
ScaleMatch match_res_scale,
bool load_if_create) {
bool load_if_create,
bool framebuffer) {
if (params.addr == 0 || params.height * params.width == 0) {
return std::make_tuple(nullptr, Common::Rectangle<u32>{});
}
@ -887,7 +882,7 @@ SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams&
SurfaceParams new_params = *surface;
new_params.res_scale = params.res_scale;
surface = CreateSurface(new_params);
surface = CreateSurface(new_params, framebuffer);
RegisterSurface(surface);
}
}
@ -1077,8 +1072,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces(
// Make sure that framebuffers don't overlap if both color and depth are being used
if (using_color_fb && using_depth_fb &&
boost::icl::length(color_vp_interval & depth_vp_interval)) {
LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
"overlapping framebuffers not supported!");
LOG_CRITICAL(Render_Vulkan, "Color and depth framebuffer memory regions overlap!");
using_depth_fb = false;
}
@ -1086,13 +1080,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces(
Surface color_surface = nullptr;
if (using_color_fb)
std::tie(color_surface, color_rect) =
GetSurfaceSubRect(color_params, ScaleMatch::Exact, false);
GetSurfaceSubRect(color_params, ScaleMatch::Exact, false, true);
Common::Rectangle<u32> depth_rect{};
Surface depth_surface = nullptr;
if (using_depth_fb)
std::tie(depth_surface, depth_rect) =
GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false);
GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false, true);
Common::Rectangle<u32> fb_rect{};
if (color_surface != nullptr && depth_surface != nullptr) {
@ -1450,13 +1444,13 @@ void RasterizerCacheVulkan::InvalidateRegion(PAddr addr, u32 size, const Surface
remove_surfaces.clear();
}
Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params) {
Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params, bool framebuffer) {
Surface surface = std::make_shared<CachedSurface>(*this);
static_cast<SurfaceParams&>(*surface) = params;
surface->invalid_regions.insert(surface->GetInterval());
AllocateTexture(surface->texture, params.type, GetFormatTuple(surface->pixel_format),
surface->GetScaledWidth(), surface->GetScaledHeight());
surface->GetScaledWidth(), surface->GetScaledHeight(), framebuffer);
return surface;
}

View File

@ -14,6 +14,7 @@
#include <boost/icl/interval_set.hpp>
#include <unordered_map>
#include <boost/functional/hash.hpp>
#include <robin_hood.h>
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@ -22,6 +23,10 @@
#include "video_core/renderer_vulkan/vk_texture.h"
#include "video_core/texture/texture_decode.h"
// Can be changed later here
template <typename Key, typename T, typename Hash = typename Key::Hash>
using HashMap = robin_hood::unordered_flat_map<Key, T, Hash>;
namespace Vulkan {
class RasterizerCacheVulkan;
@ -31,32 +36,25 @@ class FormatReinterpreterVulkan;
vk::Format GetFormatTuple(SurfaceParams::PixelFormat pixel_format);
struct HostTextureTag {
vk::Format format;
u32 width;
u32 height;
bool operator==(const HostTextureTag& rhs) const noexcept {
return std::tie(format, width, height) == std::tie(rhs.format, rhs.width, rhs.height);
};
vk::Format format = vk::Format::eUndefined;
u32 width = 0, height = 0;
// Enable comparisons
auto operator<=>(const HostTextureTag& other) const = default;
};
struct TextureCubeConfig {
PAddr px;
PAddr nx;
PAddr py;
PAddr ny;
PAddr pz;
PAddr nz;
u32 width;
PAddr px = 0;
PAddr nx = 0;
PAddr py = 0;
PAddr ny = 0;
PAddr pz = 0;
PAddr nz = 0;
u32 width = 0;
Pica::TexturingRegs::TextureFormat format;
bool operator==(const TextureCubeConfig& rhs) const {
return std::tie(px, nx, py, ny, pz, nz, width, format) ==
std::tie(rhs.px, rhs.nx, rhs.py, rhs.ny, rhs.pz, rhs.nz, rhs.width, rhs.format);
}
bool operator!=(const TextureCubeConfig& rhs) const {
return !(*this == rhs);
}
// Enable comparisons
auto operator<=>(const TextureCubeConfig& other) const = default;
};
} // namespace Vulkan
@ -98,6 +96,7 @@ using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterva
using SurfaceMap =
boost::icl::interval_map<PAddr, Surface, boost::icl::partial_absorber, std::less,
boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
using SurfaceCache =
boost::icl::interval_map<PAddr, SurfaceSet, boost::icl::partial_absorber, std::less,
boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
@ -109,8 +108,6 @@ static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval
using SurfaceRect_Tuple = std::tuple<Surface, Common::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
enum class ScaleMatch {
Exact, // only accept same res scale
Upscale, // only allow higher scale than params
@ -265,7 +262,7 @@ public:
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// 3DS memory to OpenGL and caches it (if not already cached)
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
bool load_if_create, bool framebuffer = false);
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
@ -306,9 +303,9 @@ private:
void ValidateSurface(const Surface& surface, PAddr addr, u32 size);
// Returns false if there is a surface in the cache at the interval with the same bit-width,
bool NoUnimplementedReinterpretations(const Vulkan::Surface& surface,
Vulkan::SurfaceParams& params,
const Vulkan::SurfaceInterval& interval);
bool NoUnimplementedReinterpretations(const Surface& surface,
SurfaceParams& params,
const SurfaceInterval& interval);
// Return true if a surface with an invalid pixel format exists at the interval
bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval);
@ -318,7 +315,7 @@ private:
const SurfaceInterval& interval);
/// Create a new surface
Surface CreateSurface(const SurfaceParams& params);
Surface CreateSurface(const SurfaceParams& params, bool framebuffer = false);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
@ -330,20 +327,20 @@ private:
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
SurfaceCache surface_cache;
PageMap cached_pages;
boost::icl::interval_map<u32, int> cached_pages;
SurfaceMap dirty_regions;
SurfaceSet remove_surfaces;
u16 resolution_scale_factor;
// Texture cube cache
std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
std::recursive_mutex mutex;
public:
void AllocateTexture(Texture& target, SurfaceParams::SurfaceType type, vk::Format format,
u32 width, u32 height);
std::unique_ptr<FormatReinterpreterVulkan> format_reinterpreter;
u32 width, u32 height, bool framebuffer);
};
} // namespace OpenGL
} // namespace Vulkan

View File

@ -0,0 +1,234 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include <glslang/Public/ShaderLang.h>
#include <glslang/SPIRV/GlslangToSpv.h>
#include <glslang/Include/ResourceLimits.h>
constexpr TBuiltInResource DefaultTBuiltInResource = {
.maxLights = 32,
.maxClipPlanes = 6,
.maxTextureUnits = 32,
.maxTextureCoords = 32,
.maxVertexAttribs = 64,
.maxVertexUniformComponents = 4096,
.maxVaryingFloats = 64,
.maxVertexTextureImageUnits = 32,
.maxCombinedTextureImageUnits = 80,
.maxTextureImageUnits = 32,
.maxFragmentUniformComponents = 4096,
.maxDrawBuffers = 32,
.maxVertexUniformVectors = 128,
.maxVaryingVectors = 8,
.maxFragmentUniformVectors = 16,
.maxVertexOutputVectors = 16,
.maxFragmentInputVectors = 15,
.minProgramTexelOffset = -8,
.maxProgramTexelOffset = 7,
.maxClipDistances = 8,
.maxComputeWorkGroupCountX = 65535,
.maxComputeWorkGroupCountY = 65535,
.maxComputeWorkGroupCountZ = 65535,
.maxComputeWorkGroupSizeX = 1024,
.maxComputeWorkGroupSizeY = 1024,
.maxComputeWorkGroupSizeZ = 64,
.maxComputeUniformComponents = 1024,
.maxComputeTextureImageUnits = 16,
.maxComputeImageUniforms = 8,
.maxComputeAtomicCounters = 8,
.maxComputeAtomicCounterBuffers = 1,
.maxVaryingComponents = 60,
.maxVertexOutputComponents = 64,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxFragmentInputComponents = 128,
.maxImageUnits = 8,
.maxCombinedImageUnitsAndFragmentOutputs = 8,
.maxCombinedShaderOutputResources = 8,
.maxImageSamples = 0,
.maxVertexImageUniforms = 0,
.maxTessControlImageUniforms = 0,
.maxTessEvaluationImageUniforms = 0,
.maxGeometryImageUniforms = 0,
.maxFragmentImageUniforms = 8,
.maxCombinedImageUniforms = 8,
.maxGeometryTextureImageUnits = 16,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
.maxGeometryUniformComponents = 1024,
.maxGeometryVaryingComponents = 64,
.maxTessControlInputComponents = 128,
.maxTessControlOutputComponents = 128,
.maxTessControlTextureImageUnits = 16,
.maxTessControlUniformComponents = 1024,
.maxTessControlTotalOutputComponents = 4096,
.maxTessEvaluationInputComponents = 128,
.maxTessEvaluationOutputComponents = 128,
.maxTessEvaluationTextureImageUnits = 16,
.maxTessEvaluationUniformComponents = 1024,
.maxTessPatchComponents = 120,
.maxPatchVertices = 32,
.maxTessGenLevel = 64,
.maxViewports = 16,
.maxVertexAtomicCounters = 0,
.maxTessControlAtomicCounters = 0,
.maxTessEvaluationAtomicCounters = 0,
.maxGeometryAtomicCounters = 0,
.maxFragmentAtomicCounters = 8,
.maxCombinedAtomicCounters = 8,
.maxAtomicCounterBindings = 1,
.maxVertexAtomicCounterBuffers = 0,
.maxTessControlAtomicCounterBuffers = 0,
.maxTessEvaluationAtomicCounterBuffers = 0,
.maxGeometryAtomicCounterBuffers = 0,
.maxFragmentAtomicCounterBuffers = 1,
.maxCombinedAtomicCounterBuffers = 1,
.maxAtomicCounterBufferSize = 16384,
.maxTransformFeedbackBuffers = 4,
.maxTransformFeedbackInterleavedComponents = 64,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.maxSamples = 4,
.maxMeshOutputVerticesNV = 256,
.maxMeshOutputPrimitivesNV = 512,
.maxMeshWorkGroupSizeX_NV = 32,
.maxMeshWorkGroupSizeY_NV = 1,
.maxMeshWorkGroupSizeZ_NV = 1,
.maxTaskWorkGroupSizeX_NV = 32,
.maxTaskWorkGroupSizeY_NV = 1,
.maxTaskWorkGroupSizeZ_NV = 1,
.maxMeshViewCountNV = 4,
.maxDualSourceDrawBuffersEXT = 1,
.limits = TLimits{
.nonInductiveForLoops = 1,
.whileLoops = 1,
.doWhileLoops = 1,
.generalUniformIndexing = 1,
.generalAttributeMatrixVectorIndexing = 1,
.generalVaryingIndexing = 1,
.generalSamplerIndexing = 1,
.generalVariableIndexing = 1,
.generalConstantMatrixVectorIndexing = 1,
}};
namespace VideoCore::Vulkan {
EShLanguage ToEshShaderStage(ShaderStage stage) {
switch (stage) {
case ShaderStage::Vertex:
return EShLanguage::EShLangVertex;
case ShaderStage::Geometry:
return EShLanguage::EShLangGeometry;
case ShaderStage::Fragment:
return EShLanguage::EShLangFragment;
case ShaderStage::Compute:
return EShLanguage::EShLangCompute;
default:
LOG_CRITICAL(Render_Vulkan, "Unkown shader stage");
UNREACHABLE();
}
}
bool InitializeCompiler() {
static bool glslang_initialized = false;
if (glslang_initialized) {
return true;
}
if (!glslang::InitializeProcess()) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
return false;
}
std::atexit([]() { glslang::FinalizeProcess(); });
glslang_initialized = true;
return true;
}
Shader::Shader(Instance& instance, ShaderStage stage, std::string_view name,
std::string&& source) :
ShaderBase(stage, name, std::move(source)), instance(instance) {
}
Shader::~Shader() {
vk::Device device = instance.GetDevice();
device.destroyShaderModule(module);
}
bool Shader::Compile(ShaderOptimization level) {
if (!InitializeCompiler()) {
return false;
}
EProfile profile = ECoreProfile;
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
EShLanguage lang = ToEshShaderStage(stage);
int default_version = 450;
const char* pass_source_code = source.c_str();
int pass_source_code_length = source.size();
auto shader = std::make_unique<glslang::TShader>(lang);
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
glslang::TShader::ForbidIncluder includer;
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
return false;
}
// Even though there's only a single shader, we still need to link it to generate SPV
auto program = std::make_unique<glslang::TProgram>();
program->addShader(shader.get());
if (!program->link(messages)) {
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
return false;
}
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
std::vector<u32> out_code;
spv::SpvBuildLogger logger;
glslang::SpvOptions options;
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
if (level == ShaderOptimization::Debug) {
intermediate->addSourceText(pass_source_code, pass_source_code_length);
options.generateDebugInfo = true;
options.disableOptimizer = true;
options.optimizeSize = false;
options.disassemble = false;
options.validate = true;
} else {
options.disableOptimizer = false;
options.stripDebugInfo = true;
}
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
const std::string spv_messages = logger.getAllMessages();
if (!spv_messages.empty()) {
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
}
const vk::ShaderModuleCreateInfo shader_info = {
.codeSize = out_code.size() * sizeof(u32),
.pCode = out_code.data()
};
vk::Device device = instance.GetDevice();
module = device.createShaderModule(shader_info);
return true;
}
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,32 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/common/shader.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore::Vulkan {
class Instance;
class Shader : public VideoCore::ShaderBase {
public:
Shader(Instance& instance, ShaderStage stage, std::string_view name,
std::string&& source);
~Shader() override;
bool Compile(ShaderOptimization level) override;
/// Returns the underlying vulkan shader module handle
vk::ShaderModule GetHandle() const {
return module;
}
private:
Instance& instance;
vk::ShaderModule module;
};
} // namespace VideoCore::Vulkan

View File

@ -2,28 +2,12 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstddef>
#include <string_view>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/bit_set.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/video_core.h"
#include <glslang/Public/ShaderLang.h>
#include <glslang/SPIRV/GlslangToSpv.h>
#include <glslang/Include/ResourceLimits.h>
using Pica::FramebufferRegs;
using Pica::LightingRegs;
@ -32,56 +16,7 @@ using Pica::TexturingRegs;
using TevStageConfig = TexturingRegs::TevStageConfig;
using VSOutputAttributes = RasterizerRegs::VSOutputAttributes;
namespace Vulkan {
static const char present_vertex_shader_source[] = R"(
#version 450 core
#extension GL_ARB_separate_shader_objects : enable
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec3 vert_tex_coord;
layout (location = 0) out vec3 frag_tex_coord;
layout (push_constant) uniform DrawInfo {
mat4 modelview_matrix;
vec4 i_resolution;
vec4 o_resolution;
int layer;
};
void main() {
vec4 position = vec4(vert_position, 0.0, 1.0) * modelview_matrix;
gl_Position = vec4(position.x, -position.y, 0.0, 1.0);
frag_tex_coord = vert_tex_coord;
}
)";
static const char present_fragment_shader_source[] = R"(
#version 450 core
#extension GL_ARB_separate_shader_objects : enable
layout (location = 0) in vec3 frag_tex_coord;
layout (location = 0) out vec4 color;
layout (push_constant) uniform DrawInfo {
mat3x2 modelview_matrix;
vec4 i_resolution;
vec4 o_resolution;
int layer;
};
layout (set = 0, binding = 0) uniform sampler2D screen_textures[3];
void main() {
color = texture(screen_textures[int(frag_tex_coord.z)], frag_tex_coord.xy);
}
)";
std::string GetPresentVertexShader() {
return present_vertex_shader_source;
}
std::string GetPresentFragmentShader() {
return present_fragment_shader_source;
}
namespace VideoCore::Vulkan {
constexpr std::string_view UniformBlockDef = R"(
#define NUM_TEV_STAGES 6
@ -162,184 +97,6 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_
return out;
}
PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
PicaFSConfig res{};
auto& state = res.state;
state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
state.depthmap_enable = regs.rasterizer.depthmap_enable;
state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
? regs.framebuffer.output_merger.alpha_test.func.Value()
: FramebufferRegs::CompareFunc::Always;
state.texture0_type = regs.texturing.texture0.type;
state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
// We don't need these otherwise, reset them to avoid unnecessary shader generation
state.alphablend_enable = {};
state.logic_op = {};
// Copy relevant tev stages fields.
// We don't sync const_color here because of the high variance, it is a
// shader uniform instead.
const auto& tev_stages = regs.texturing.GetTevStages();
DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
for (std::size_t i = 0; i < tev_stages.size(); i++) {
const auto& tev_stage = tev_stages[i];
state.tev_stages[i].sources_raw = tev_stage.sources_raw;
state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
state.tev_stages[i].ops_raw = tev_stage.ops_raw;
state.tev_stages[i].scales_raw = tev_stage.scales_raw;
}
state.fog_mode = regs.texturing.fog_mode;
state.fog_flip = regs.texturing.fog_flip != 0;
state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
<< 4;
// Fragment lighting
state.lighting.enable = !regs.lighting.disable;
state.lighting.src_num = regs.lighting.max_light_index + 1;
for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
unsigned num = regs.lighting.light_enable.GetNum(light_index);
const auto& light = regs.lighting.light[num];
state.lighting.light[light_index].num = num;
state.lighting.light[light_index].directional = light.config.directional != 0;
state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
state.lighting.light[light_index].dist_atten_enable =
!regs.lighting.IsDistAttenDisabled(num);
state.lighting.light[light_index].spot_atten_enable =
!regs.lighting.IsSpotAttenDisabled(num);
state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num);
}
state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
// this is a dummy field due to lack of the corresponding register
state.lighting.lut_sp.enable = true;
state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
state.lighting.config = regs.lighting.config0.config;
state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
state.lighting.bump_mode = regs.lighting.config0.bump_mode;
state.lighting.bump_selector = regs.lighting.config0.bump_selector;
state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
state.lighting.shadow_selector = regs.lighting.config0.shadow_selector;
state.proctex.enable = regs.texturing.main_config.texture3_enable;
if (state.proctex.enable) {
state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
state.proctex.u_shift = regs.texturing.proctex.u_shift;
state.proctex.v_shift = regs.texturing.proctex.v_shift;
state.proctex.lut_width = regs.texturing.proctex_lut.width;
state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
}
state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
FramebufferRegs::FragmentOperationMode::Shadow;
state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
return res;
}
void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
program_hash = setup.GetProgramCodeHash();
swizzle_hash = setup.GetSwizzleDataHash();
main_offset = regs.main_offset;
sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
num_outputs = 0;
output_map.fill(16);
for (int reg : Common::BitSet<u32>(regs.output_mask)) {
output_map[reg] = num_outputs++;
}
}
void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) {
vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
gs_output_attributes = vs_output_attributes;
semantic_maps.fill({16, 0});
for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
const std::array semantics{
regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
};
for (u32 comp = 0; comp < 4; ++comp) {
const auto semantic = semantics[comp];
if (static_cast<std::size_t>(semantic) < 24) {
semantic_maps[static_cast<std::size_t>(semantic)] = {attrib, comp};
} else if (semantic != VSOutputAttributes::INVALID) {
LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
}
}
}
}
/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
static bool IsPassThroughTevStage(const TevStageConfig& stage) {
return (stage.color_op == TevStageConfig::Operation::Replace &&
@ -352,7 +109,7 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
}
static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
const auto& state = config.state;
const auto& state = config;
switch (texture_unit) {
case 0:
// Only unit 0 respects the texturing type
@ -628,23 +385,22 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
}
/// Writes the if-statement condition used to evaluate alpha testing
static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) {
using CompareFunc = FramebufferRegs::CompareFunc;
static void AppendAlphaTestCondition(std::string& out, Pica::CompareFunc func) {
switch (func) {
case CompareFunc::Never:
case Pica::CompareFunc::Never:
out += "true";
break;
case CompareFunc::Always:
case Pica::CompareFunc::Always:
out += "false";
break;
case CompareFunc::Equal:
case CompareFunc::NotEqual:
case CompareFunc::LessThan:
case CompareFunc::LessThanOrEqual:
case CompareFunc::GreaterThan:
case CompareFunc::GreaterThanOrEqual: {
case Pica::CompareFunc::Equal:
case Pica::CompareFunc::NotEqual:
case Pica::CompareFunc::LessThan:
case Pica::CompareFunc::LessThanOrEqual:
case Pica::CompareFunc::GreaterThan:
case Pica::CompareFunc::GreaterThanOrEqual: {
static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"};
const auto index = static_cast<u32>(func) - static_cast<u32>(CompareFunc::Equal);
const auto index = static_cast<u32>(func) - static_cast<u32>(Pica::CompareFunc::Equal);
out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]);
break;
}
@ -659,7 +415,7 @@ static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareF
/// Writes the code to emulate the specified TEV stage
static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) {
const auto stage =
static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
static_cast<const TexturingRegs::TevStageConfig>(config.tev_stages[index]);
if (!IsPassThroughTevStage(stage)) {
const std::string index_name = std::to_string(index);
@ -716,7 +472,7 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
/// Writes the code to emulate fragment lighting
static void WriteLighting(std::string& out, const PicaFSConfig& config) {
const auto& lighting = config.state.lighting;
const auto& lighting = config.lighting;
// Define lighting globals
out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
@ -1119,7 +875,7 @@ float ProcTexLookupLUT(int offset, float coord) {
)";
// Noise utility
if (config.state.proctex.noise_enable) {
if (config.proctex.noise_enable) {
// See swrasterizer/proctex.cpp for more information about these functions
out += R"(
int ProcTexNoiseRand1D(int v) {
@ -1159,16 +915,16 @@ float ProcTexNoiseCoef(vec2 x) {
}
out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n";
out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width);
out += fmt::format("int lut_width = {} >> level;\n", config.proctex.lut_width);
// Offsets for level 4-7 seem to be hardcoded
out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n",
config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
config.state.proctex.lut_offset2, config.state.proctex.lut_offset3);
config.proctex.lut_offset0, config.proctex.lut_offset1,
config.proctex.lut_offset2, config.proctex.lut_offset3);
out += "int lut_offset = lut_offsets[level];\n";
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
out += "lut_coord *= float(lut_width - 1);\n";
switch (config.state.proctex.lut_filter) {
switch (config.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::LinearMipmapLinear:
case ProcTexFilter::LinearMipmapNearest:
@ -1191,8 +947,8 @@ float ProcTexNoiseCoef(vec2 x) {
out += "}\n";
out += "vec4 ProcTex() {\n";
if (config.state.proctex.coord < 3) {
out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord);
if (config.proctex.coord < 3) {
out += fmt::format("vec2 uv = abs(texcoord{});\n", config.proctex.coord);
} else {
LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3");
out += "vec2 uv = abs(texcoord0);\n";
@ -1205,23 +961,23 @@ float ProcTexNoiseCoef(vec2 x) {
out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n";
// unlike normal texture, the bias is inside the log2
out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n",
config.state.proctex.lut_width);
config.proctex.lut_width);
out += "if (proctex_bias == 0.0) lod = 0.0;\n";
out += fmt::format("lod = clamp(lod, {:#}, {:#});\n",
std::max(0.0f, static_cast<float>(config.state.proctex.lod_min)),
std::min(7.0f, static_cast<float>(config.state.proctex.lod_max)));
std::max(0.0f, static_cast<float>(config.proctex.lod_min)),
std::min(7.0f, static_cast<float>(config.proctex.lod_max)));
// Get shift offset before noise generation
out += "float u_shift = ";
AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift,
config.state.proctex.u_clamp);
AppendProcTexShiftOffset(out, "uv.y", config.proctex.u_shift,
config.proctex.u_clamp);
out += ";\n";
out += "float v_shift = ";
AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift,
config.state.proctex.v_clamp);
AppendProcTexShiftOffset(out, "uv.x", config.proctex.v_shift,
config.proctex.v_clamp);
out += ";\n";
// Generate noise
if (config.state.proctex.noise_enable) {
if (config.proctex.noise_enable) {
out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n"
"uv = abs(uv);\n";
}
@ -1231,16 +987,16 @@ float ProcTexNoiseCoef(vec2 x) {
"float v = uv.y + v_shift;\n";
// Clamp
AppendProcTexClamp(out, "u", config.state.proctex.u_clamp);
AppendProcTexClamp(out, "v", config.state.proctex.v_clamp);
AppendProcTexClamp(out, "u", config.proctex.u_clamp);
AppendProcTexClamp(out, "v", config.proctex.v_clamp);
// Combine and map
out += "float lut_coord = ";
AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner,
AppendProcTexCombineAndMap(out, config.proctex.color_combiner,
"proctex_color_map_offset");
out += ";\n";
switch (config.state.proctex.lut_filter) {
switch (config.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::Nearest:
out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n";
@ -1258,11 +1014,11 @@ float ProcTexNoiseCoef(vec2 x) {
break;
}
if (config.state.proctex.separate_alpha) {
if (config.proctex.separate_alpha) {
// Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
// uses the output of CombineAndMap directly instead.
out += "float final_alpha = ";
AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner,
AppendProcTexCombineAndMap(out, config.proctex.alpha_combiner,
"proctex_alpha_map_offset");
out += ";\n";
out += "return vec4(final_color.xyz, final_alpha);\n}\n";
@ -1271,8 +1027,8 @@ float ProcTexNoiseCoef(vec2 x) {
}
}
std::string GenerateFragmentShader(const PicaFSConfig& config) {
const auto& state = config.state;
std::string ShaderGenerator::GenerateFragmentShader(const PicaFSConfig& config, bool seperable_shader) {
const auto& state = config;
std::string out;
out += R"(
@ -1387,7 +1143,7 @@ std::string GenerateFragmentShader(const PicaFSConfig& config) {
vec4 shadowTexture(vec2 uv, float w) {
)";
if (!config.state.shadow_texture_orthographic) {
if (!config.shadow_texture_orthographic) {
out += "uv /= w;";
}
@ -1501,7 +1257,7 @@ vec4 shadowTextureCube(vec2 uv, float w) {
#endif
)";
if (config.state.proctex.enable)
if (config.proctex.enable)
AppendProcTexSampler(out, config);
// We round the interpolated primary color to the nearest 1/255th
@ -1514,7 +1270,7 @@ vec4 secondary_fragment_color = vec4(0.0);
)";
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) {
if (state.alpha_test_func == Pica::CompareFunc::Never) {
out += "discard; }";
return out;
}
@ -1552,7 +1308,7 @@ vec4 secondary_fragment_color = vec4(0.0);
WriteTevStage(out, config, static_cast<u32>(index));
}
if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) {
if (state.alpha_test_func != Pica::CompareFunc::Always) {
out += "if (";
AppendAlphaTestCondition(out, state.alpha_test_func);
out += ") discard;\n";
@ -1623,12 +1379,11 @@ do {
return out;
}
std::string GenerateTrivialVertexShader(bool separable_shader) {
std::string ShaderGenerator::GenerateTrivialVertexShader(bool separable_shader) {
std::string out;
out += "#version 450\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n";
out +=
fmt::format("layout(location = {}) in vec4 vert_position;\n"
out += fmt::format("layout(location = {}) in vec4 vert_position;\n"
"layout(location = {}) in vec4 vert_color;\n"
"layout(location = {}) in vec2 vert_texcoord0;\n"
"layout(location = {}) in vec2 vert_texcoord1;\n"
@ -1656,7 +1411,6 @@ void main() {
gl_Position = vert_position;
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
//gl_Position.y = -gl_Position.y;
//gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
//gl_ClipDistance[1] = dot(clip_coef, vert_position);
}
@ -1665,205 +1419,15 @@ void main() {
return out;
}
bool InitializeCompiler() {
static bool glslang_initialized = false;
if (glslang_initialized) {
return true;
}
if (!glslang::InitializeProcess()) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
return false;
}
std::atexit([]() { glslang::FinalizeProcess(); });
glslang_initialized = true;
return true;
std::string ShaderGenerator::GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
bool separable_shader) {
LOG_CRITICAL(Render_Vulkan, "Unimplemented!");
UNREACHABLE();
}
const TBuiltInResource DefaultTBuiltInResource = {
.maxLights = 32,
.maxClipPlanes = 6,
.maxTextureUnits = 32,
.maxTextureCoords = 32,
.maxVertexAttribs = 64,
.maxVertexUniformComponents = 4096,
.maxVaryingFloats = 64,
.maxVertexTextureImageUnits = 32,
.maxCombinedTextureImageUnits = 80,
.maxTextureImageUnits = 32,
.maxFragmentUniformComponents = 4096,
.maxDrawBuffers = 32,
.maxVertexUniformVectors = 128,
.maxVaryingVectors = 8,
.maxFragmentUniformVectors = 16,
.maxVertexOutputVectors = 16,
.maxFragmentInputVectors = 15,
.minProgramTexelOffset = -8,
.maxProgramTexelOffset = 7,
.maxClipDistances = 8,
.maxComputeWorkGroupCountX = 65535,
.maxComputeWorkGroupCountY = 65535,
.maxComputeWorkGroupCountZ = 65535,
.maxComputeWorkGroupSizeX = 1024,
.maxComputeWorkGroupSizeY = 1024,
.maxComputeWorkGroupSizeZ = 64,
.maxComputeUniformComponents = 1024,
.maxComputeTextureImageUnits = 16,
.maxComputeImageUniforms = 8,
.maxComputeAtomicCounters = 8,
.maxComputeAtomicCounterBuffers = 1,
.maxVaryingComponents = 60,
.maxVertexOutputComponents = 64,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxFragmentInputComponents = 128,
.maxImageUnits = 8,
.maxCombinedImageUnitsAndFragmentOutputs = 8,
.maxCombinedShaderOutputResources = 8,
.maxImageSamples = 0,
.maxVertexImageUniforms = 0,
.maxTessControlImageUniforms = 0,
.maxTessEvaluationImageUniforms = 0,
.maxGeometryImageUniforms = 0,
.maxFragmentImageUniforms = 8,
.maxCombinedImageUniforms = 8,
.maxGeometryTextureImageUnits = 16,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
.maxGeometryUniformComponents = 1024,
.maxGeometryVaryingComponents = 64,
.maxTessControlInputComponents = 128,
.maxTessControlOutputComponents = 128,
.maxTessControlTextureImageUnits = 16,
.maxTessControlUniformComponents = 1024,
.maxTessControlTotalOutputComponents = 4096,
.maxTessEvaluationInputComponents = 128,
.maxTessEvaluationOutputComponents = 128,
.maxTessEvaluationTextureImageUnits = 16,
.maxTessEvaluationUniformComponents = 1024,
.maxTessPatchComponents = 120,
.maxPatchVertices = 32,
.maxTessGenLevel = 64,
.maxViewports = 16,
.maxVertexAtomicCounters = 0,
.maxTessControlAtomicCounters = 0,
.maxTessEvaluationAtomicCounters = 0,
.maxGeometryAtomicCounters = 0,
.maxFragmentAtomicCounters = 8,
.maxCombinedAtomicCounters = 8,
.maxAtomicCounterBindings = 1,
.maxVertexAtomicCounterBuffers = 0,
.maxTessControlAtomicCounterBuffers = 0,
.maxTessEvaluationAtomicCounterBuffers = 0,
.maxGeometryAtomicCounterBuffers = 0,
.maxFragmentAtomicCounterBuffers = 1,
.maxCombinedAtomicCounterBuffers = 1,
.maxAtomicCounterBufferSize = 16384,
.maxTransformFeedbackBuffers = 4,
.maxTransformFeedbackInterleavedComponents = 64,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.maxSamples = 4,
.maxMeshOutputVerticesNV = 256,
.maxMeshOutputPrimitivesNV = 512,
.maxMeshWorkGroupSizeX_NV = 32,
.maxMeshWorkGroupSizeY_NV = 1,
.maxMeshWorkGroupSizeZ_NV = 1,
.maxTaskWorkGroupSizeX_NV = 32,
.maxTaskWorkGroupSizeY_NV = 1,
.maxTaskWorkGroupSizeZ_NV = 1,
.maxMeshViewCountNV = 4,
.maxDualSourceDrawBuffersEXT = 1,
.limits = TLimits{
.nonInductiveForLoops = 1,
.whileLoops = 1,
.doWhileLoops = 1,
.generalUniformIndexing = 1,
.generalAttributeMatrixVectorIndexing = 1,
.generalVaryingIndexing = 1,
.generalSamplerIndexing = 1,
.generalVariableIndexing = 1,
.generalConstantMatrixVectorIndexing = 1,
}};
vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits vk_stage) {
if (!InitializeCompiler()) {
return VK_NULL_HANDLE;
}
EShLanguage stage;
switch (vk_stage) {
case vk::ShaderStageFlagBits::eVertex:
stage = EShLangVertex;
break;
case vk::ShaderStageFlagBits::eFragment:
stage = EShLangFragment;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown shader stage");
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) {
LOG_CRITICAL(Render_Vulkan, "Unimplemented!");
UNREACHABLE();
}
std::unique_ptr<glslang::TShader> shader = std::make_unique<glslang::TShader>(stage);
std::unique_ptr<glslang::TProgram> program;
glslang::TShader::ForbidIncluder includer;
EProfile profile = ECoreProfile;
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
int default_version = 450;
const char* pass_source_code = source.data();
int pass_source_code_length = static_cast<int>(source.size());
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
return VK_NULL_HANDLE;
}
// Even though there's only a single shader, we still need to link it to generate SPV
program = std::make_unique<glslang::TProgram>();
program->addShader(shader.get());
if (!program->link(messages)) {
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
return VK_NULL_HANDLE;
}
glslang::TIntermediate* intermediate = program->getIntermediate(stage);
std::vector<u32> out_code;
spv::SpvBuildLogger logger;
glslang::SpvOptions options;
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
if (true) {
intermediate->addSourceText(pass_source_code, pass_source_code_length);
options.generateDebugInfo = true;
options.disableOptimizer = true;
options.optimizeSize = false;
options.disassemble = false;
options.validate = true;
}
else {
options.disableOptimizer = false;
options.stripDebugInfo = true;
}
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
const std::string spv_messages = logger.getAllMessages();
if (!spv_messages.empty()) {
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
}
vk::ShaderModuleCreateInfo shader_info{{}, out_code.size() * sizeof(u32), out_code.data()};
const vk::Device device = g_vk_instace->GetDevice();
vk::ShaderModule shader_module = device.createShaderModule(shader_info);
return shader_module;
}
} // namespace Vulkan

View File

@ -4,46 +4,23 @@
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <optional>
#include <string>
#include <type_traits>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
#include "video_core/renderer_vulkan/vk_shader_state.h"
#include "video_core/common/shader_gen.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
/**
* Returns the vertex and fragment shader sources used for presentation
* @returns String of shader source code
*/
std::string GetPresentVertexShader();
std::string GetPresentFragmentShader();
class ShaderGenerator : public VideoCore::ShaderGeneratorBase {
public:
ShaderGenerator() = default;
~ShaderGenerator() override = default;
/**
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
* and directly passes them to the fragment shader.
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateTrivialVertexShader(bool separable_shader);
std::string GenerateTrivialVertexShader(bool separable_shader) override;
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateFragmentShader(const PicaFSConfig& config);
std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
bool separable_shader) override;
/**
* Generates a SPRI-V shader module from the provided GLSL source code
*/
vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits stage);
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) override;
} // namespace Vulkan
std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) override;
};
} // namespace VideoCore

View File

@ -14,10 +14,10 @@
namespace Vulkan {
/* Render vertex attributes */
struct VertexBase {
VertexBase() = default;
VertexBase(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex {
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
position[0] = v.pos.x.ToFloat32();
position[1] = v.pos.y.ToFloat32();
position[2] = v.pos.z.ToFloat32();
@ -56,31 +56,12 @@ struct VertexBase {
glm::vec3 view;
};
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex : public VertexBase {
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexBase(v, flip_quaternion) {};
static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexBase));
static constexpr std::array<vk::VertexInputAttributeDescription, 8> attribute_desc =
{
vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, position)),
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, color)),
vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord0)),
vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord1)),
vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord2)),
vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexBase, tex_coord0_w)),
vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, normquat)),
vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexBase, view)),
};
};
/**
* Vertex structure that the drawn screen rectangles are composed of.
*/
struct ScreenRectVertexBase {
ScreenRectVertexBase() = default;
ScreenRectVertexBase(float x, float y, float u, float v, float s) {
struct ScreenRectVertex {
ScreenRectVertex() = default;
ScreenRectVertex(float x, float y, float u, float v, float s) {
position.x = x;
position.y = y;
tex_coord.x = u;
@ -92,241 +73,4 @@ struct ScreenRectVertexBase {
glm::vec3 tex_coord;
};
struct ScreenRectVertex : public ScreenRectVertexBase {
ScreenRectVertex() = default;
ScreenRectVertex(float x, float y, float u, float v, float s) : ScreenRectVertexBase(x, y, u, v, s) {};
static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertexBase));
static constexpr std::array<vk::VertexInputAttributeDescription, 2> attribute_desc =
{
vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, offsetof(ScreenRectVertexBase, position)),
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(ScreenRectVertexBase, tex_coord)),
};
};
enum class ProgramType : u32 { VS, GS, FS };
enum Attributes {
ATTRIBUTE_POSITION,
ATTRIBUTE_COLOR,
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
Pica::TexturingRegs::TevStageConfig stage;
stage.sources_raw = sources_raw;
stage.modifiers_raw = modifiers_raw;
stage.ops_raw = ops_raw;
stage.const_color = 0;
stage.scales_raw = scales_raw;
return stage;
}
};
struct PicaFSConfigState {
Pica::FramebufferRegs::CompareFunc alpha_test_func;
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
bool texture2_use_coord1;
std::array<TevStageConfigRaw, 6> tev_stages;
u8 combiner_buffer_input;
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip;
bool alphablend_enable;
Pica::FramebufferRegs::LogicOp logic_op;
struct {
struct {
unsigned num;
bool directional;
bool two_sided_diffuse;
bool dist_atten_enable;
bool spot_atten_enable;
bool geometric_factor_0;
bool geometric_factor_1;
bool shadow_enable;
} light[8];
bool enable;
unsigned src_num;
Pica::LightingRegs::LightingBumpMode bump_mode;
unsigned bump_selector;
bool bump_renorm;
bool clamp_highlights;
Pica::LightingRegs::LightingConfig config;
bool enable_primary_alpha;
bool enable_secondary_alpha;
bool enable_shadow;
bool shadow_primary;
bool shadow_secondary;
bool shadow_invert;
bool shadow_alpha;
unsigned shadow_selector;
struct {
bool enable;
bool abs_input;
Pica::LightingRegs::LightingLutInput type;
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
bool enable;
u32 coord;
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
bool separate_alpha;
bool noise_enable;
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
u32 lut_width;
u32 lut_offset0;
u32 lut_offset1;
u32 lut_offset2;
u32 lut_offset3;
u32 lod_min;
u32 lod_max;
Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex;
bool shadow_rendering;
bool shadow_texture_orthographic;
};
/**
* This struct contains all state used to generate the GLSL fragment shader that emulates the
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
*/
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
/// Construct a PicaFSConfig with the given Pica register configuration.
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
};
/**
* This struct contains common information to identify a GL vertex/geometry shader generated from
* PICA vertex/geometry shader.
*/
struct PicaShaderConfigCommon {
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
u64 program_hash;
u64 swizzle_hash;
u32 main_offset;
bool sanitize_mul;
u32 num_outputs;
// output_map[output register index] -> output attribute index
std::array<u32, 16> output_map;
};
/**
* This struct contains information to identify a GL vertex shader generated from PICA vertex
* shader.
*/
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
state.Init(regs, setup);
}
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
state = conf;
}
};
struct PicaGSConfigCommonRaw {
void Init(const Pica::Regs& regs);
u32 vs_output_attributes;
u32 gs_output_attributes;
struct SemanticMap {
u32 attribute_index;
u32 component_index;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps;
};
/**
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
* shader pipeline
*/
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
state.Init(regs);
}
};
struct PipelineCacheKey {
vk::Format color, depth_stencil;
vk::PipelineColorBlendAttachmentState blend_config;
vk::LogicOp blend_logic_op;
PicaFSConfig fragment_config;
auto operator <=>(const PipelineCacheKey& other) const = default;
u64 Hash() const {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(PipelineCacheKey));
return static_cast<size_t>(hash);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::PicaFSConfig> {
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaVSConfig> {
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaFixedGSConfig> {
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PipelineCacheKey> {
size_t operator()(const Vulkan::PipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@ -7,9 +7,9 @@
#include <array>
#include <bitset>
#include "video_core/regs.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_shader_state.h"
#include "video_core/renderer_vulkan/vk_pipeline_builder.h"
#include "video_core/renderer_vulkan/vk_texture.h"
namespace Vulkan {
@ -69,7 +69,7 @@ public:
bool StencilTestEnabled() const { return stencil_enabled && stencil_writes; }
/// Configure drawing state
void SetVertexBuffer(const Buffer& buffer, vk::DeviceSize offset);
void SetVertexBuffer(const StreamBuffer& buffer, vk::DeviceSize offset);
void SetViewport(vk::Viewport viewport);
void SetScissor(vk::Rect2D scissor);
void SetCullMode(vk::CullModeFlags flags);
@ -100,9 +100,9 @@ public:
void EndRendering();
/// Configure shader resources
void SetUniformBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer);
void SetUniformBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer);
void SetTexture(u32 binding, const Texture& texture);
void SetTexelBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer, u32 view_index);
void SetTexelBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer, u32 view_index);
void SetPresentTextures(vk::ImageView view0, vk::ImageView view1, vk::ImageView view2);
void SetPresentData(DrawInfo data);
void SetPlaceholderColor(u8 red, u8 green, u8 blue, u8 alpha);

View File

@ -2,60 +2,69 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <array>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
Swapchain::Swapchain(vk::SurfaceKHR surface_) : surface(surface_) {
Swapchain::Swapchain(Instance& instance, vk::SurfaceKHR surface) :
instance(instance), surface(surface) {
}
Swapchain::~Swapchain() {
auto device = g_vk_instace->GetDevice();
auto instance = g_vk_instace->GetInstance();
device.waitIdle();
// Destroy swapchain resources
vk::Device device = instance.GetDevice();
device.destroySemaphore(render_finished);
device.destroySemaphore(image_available);
device.destroySwapchainKHR(swapchain);
instance.destroySurfaceKHR(surface);
}
bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
is_outdated = false;
is_suboptimal = false;
// Fetch information about the provided surface
PopulateSwapchainDetails(surface, width, height);
Configure(width, height);
const std::array indices {
g_vk_instace->GetGraphicsQueueFamilyIndex(),
g_vk_instace->GetPresentQueueFamilyIndex(),
const std::array queue_family_indices = {
instance.GetGraphicsQueueFamilyIndex(),
instance.GetPresentQueueFamilyIndex(),
};
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
const u32 queue_family_indices_count = exclusive ? 2u : 1u;
const vk::SharingMode sharing_mode = exclusive ? vk::SharingMode::eExclusive :
vk::SharingMode::eConcurrent;
// Now we can actually create the swapchain
vk::SwapchainCreateInfoKHR swapchain_info{{}, surface, details.image_count, details.format.format,
details.format.colorSpace, details.extent, 1, vk::ImageUsageFlagBits::eColorAttachment,
vk::SharingMode::eExclusive, 1, indices.data(), details.transform,
vk::CompositeAlphaFlagBitsKHR::eOpaque, details.present_mode, true, swapchain};
const vk::SwapchainCreateInfoKHR swapchain_info = {
.surface = surface,
.minImageCount = image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = extent,
.imageArrayLayers = 1,
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment,
.imageSharingMode = sharing_mode,
.queueFamilyIndexCount = queue_family_indices_count,
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.presentMode = present_mode,
.clipped = true,
.oldSwapchain = swapchain
};
// For dedicated present queues, select concurrent sharing mode
if (indices[0] != indices[1]) {
swapchain_info.imageSharingMode = vk::SharingMode::eConcurrent;
swapchain_info.queueFamilyIndexCount = 2;
}
auto device = g_vk_instace->GetDevice();
auto new_swapchain = device.createSwapchainKHR(swapchain_info);
vk::Device device = instance.GetDevice();
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
// If an old swapchain exists, destroy it and move the new one to its place.
if (swapchain) {
device.destroy(swapchain);
if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
device.destroySwapchainKHR(old_swapchain);
}
swapchain = new_swapchain;
// Create sync objects if not already created
if (!image_available) {
@ -67,19 +76,17 @@ bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
}
// Create framebuffer and image views
swapchain_images.clear();
SetupImages();
return true;
images = device.getSwapchainImagesKHR(swapchain);
}
// Wait for maximum of 1 second
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
void Swapchain::AcquireNextImage() {
auto result = g_vk_instace->GetDevice().acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT,
vk::Device device = instance.GetDevice();
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT,
image_available, VK_NULL_HANDLE,
&image_index);
&current_image);
switch (result) {
case vk::Result::eSuccess:
break;
@ -90,15 +97,21 @@ void Swapchain::AcquireNextImage() {
is_outdated = true;
break;
default:
LOG_ERROR(Render_Vulkan, "acquireNextImageKHR returned unknown result");
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
break;
}
}
void Swapchain::Present() {
const auto present_queue = g_vk_instace->GetPresentQueue();
const vk::PresentInfoKHR present_info = {
.waitSemaphoreCount = 1,
.pWaitSemaphores = &render_finished,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &current_image
};
vk::PresentInfoKHR present_info(render_finished, swapchain, image_index);
vk::Queue present_queue = instance.GetPresentQueue();
vk::Result result = present_queue.presentKHR(present_info);
switch (result) {
@ -115,91 +128,68 @@ void Swapchain::Present() {
break;
}
frame_index = (frame_index + 1) % swapchain_images.size();
current_frame = (current_frame + 1) % images.size();
}
void Swapchain::PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height) {
auto gpu = g_vk_instace->GetPhysicalDevice();
void Swapchain::Configure(u32 width, u32 height) {
vk::PhysicalDevice physical = instance.GetPhysicalDevice();
// Choose surface format
auto formats = gpu.getSurfaceFormatsKHR(surface);
details.format = formats[0];
auto formats = physical.getSurfaceFormatsKHR(surface);
surface_format = formats[0];
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
details.format = { vk::Format::eB8G8R8A8Unorm };
}
else {
for (const auto& format : formats) {
if (format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
format.format == vk::Format::eB8G8R8A8Unorm) {
details.format = format;
break;
}
surface_format = vk::SurfaceFormatKHR{
.format = vk::Format::eB8G8R8A8Unorm
};
} else {
auto iter = std::find_if(formats.begin(), formats.end(), [](vk::SurfaceFormatKHR format) -> bool {
return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
format.format == vk::Format::eB8G8R8A8Unorm;
});
if (iter == formats.end()) {
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
}
}
// Checks if a particular mode is supported, if it is, returns that mode.
auto modes = gpu.getSurfacePresentModesKHR(surface);
auto ModePresent = [&modes](vk::PresentModeKHR check_mode) {
auto it = std::find_if(modes.begin(), modes.end(), [check_mode](const auto& mode) {
return check_mode == mode;
});
return it != modes.end();
};
auto modes = physical.getSurfacePresentModesKHR(surface);
// FIFO is guaranteed by the Vulkan standard to be available
details.present_mode = vk::PresentModeKHR::eFifo;
present_mode = vk::PresentModeKHR::eFifo;
auto iter = std::find_if(modes.begin(), modes.end(), [](vk::PresentModeKHR mode) {
return vk::PresentModeKHR::eMailbox == mode;
});
// Prefer Mailbox if present for lowest latency
if (ModePresent(vk::PresentModeKHR::eMailbox)) {
details.present_mode = vk::PresentModeKHR::eMailbox;
if (iter != modes.end()) {
present_mode = vk::PresentModeKHR::eMailbox;
}
// Query surface extent
auto capabilities = gpu.getSurfaceCapabilitiesKHR(surface);
details.extent = capabilities.currentExtent;
auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
extent = capabilities.currentExtent;
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
details.extent.width = std::clamp(width, capabilities.minImageExtent.width,
extent.width = std::clamp(width, capabilities.minImageExtent.width,
capabilities.maxImageExtent.width);
details.extent.height = std::clamp(height, capabilities.minImageExtent.height,
extent.height = std::clamp(height, capabilities.minImageExtent.height,
capabilities.maxImageExtent.height);
}
// Select number of images in swap chain, we prefer one buffer in the background to work on
details.image_count = capabilities.minImageCount + 1;
image_count = capabilities.minImageCount + 1;
if (capabilities.maxImageCount > 0) {
details.image_count = std::min(details.image_count, capabilities.maxImageCount);
image_count = std::min(image_count, capabilities.maxImageCount);
}
// Prefer identity transform if possible
details.transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
if (!(capabilities.supportedTransforms & details.transform)) {
details.transform = capabilities.currentTransform;
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
if (!(capabilities.supportedTransforms & transform)) {
transform = capabilities.currentTransform;
}
}
void Swapchain::SetupImages() {
// Get the swap chain images
auto device = g_vk_instace->GetDevice();
auto images = device.getSwapchainImagesKHR(swapchain);
Texture::Info image_info{
.width = details.extent.width,
.height = details.extent.height,
.format = details.format.format,
.type = vk::ImageType::e2D,
.view_type = vk::ImageViewType::e2D,
.usage = vk::ImageUsageFlagBits::eColorAttachment
};
// Create the swapchain buffers containing the image and imageview
swapchain_images.resize(images.size());
for (int i = 0; i < swapchain_images.size(); i++) {
// Wrap swapchain images with Texture
swapchain_images[i].Adopt(image_info, images[i]);
}
}
} // namespace Vulkan
} // namespace VideoCore::Vulkan

View File

@ -4,62 +4,90 @@
#pragma once
#include <string_view>
#include <vector>
#include "core/frontend/emu_window.h"
#include "video_core/renderer_vulkan/vk_texture.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
struct SwapChainDetails {
vk::SurfaceFormatKHR format;
class Instance;
class Swapchain {
public:
Swapchain(Instance& instance, vk::SurfaceKHR surface);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, bool vsync_enabled);
/// Acquire the next image in the swapchain.
void AcquireNextImage();
/// Present the current image and move to the next one
void Present();
/// Return current swapchain state
inline vk::Extent2D GetExtent() const {
return extent;
}
/// Return the swapchain surface
inline vk::SurfaceKHR GetSurface() const {
return surface;
}
/// Return the swapchain format
inline vk::SurfaceFormatKHR GetSurfaceFormat() const {
return surface_format;
}
/// Return the Vulkan swapchain handle
inline vk::SwapchainKHR GetHandle() const {
return swapchain;
}
/// Return the semaphore that will be signaled when vkAcquireNextImageKHR completes
inline vk::Semaphore GetAvailableSemaphore() const {
return image_available;
}
/// Return the semaphore that will signal when the current image will be presented
inline vk::Semaphore GetPresentSemaphore() const {
return render_finished;
}
/// Return the current swapchain image
inline vk::Image GetCurrentImage() {
return images[current_image];
}
/// Returns true when the swapchain should be recreated
inline bool NeedsRecreation() const {
return is_suboptimal || is_outdated;
}
private:
void Configure(u32 width, u32 height);
private:
Instance& instance;
vk::SwapchainKHR swapchain = VK_NULL_HANDLE;
vk::SurfaceKHR surface = VK_NULL_HANDLE;
// Swapchain properties
vk::SurfaceFormatKHR surface_format;
vk::PresentModeKHR present_mode;
vk::Extent2D extent;
vk::SurfaceTransformFlagBitsKHR transform;
u32 image_count;
};
class Swapchain {
public:
Swapchain(vk::SurfaceKHR surface);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
bool Create(u32 width, u32 height, bool vsync_enabled);
/// Acquire the next image in the swapchain.
void AcquireNextImage();
void Present();
/// Returns true when the swapchain needs to be recreated.
bool NeedsRecreation() const { return IsSubOptimal() || IsOutDated(); }
bool IsOutDated() const { return is_outdated; }
bool IsSubOptimal() const { return is_suboptimal; }
bool IsVSyncEnabled() const { return vsync_enabled; }
u32 GetCurrentImageIndex() const { return image_index; }
/// Get current swapchain state
vk::Extent2D GetSize() const { return details.extent; }
vk::SurfaceKHR GetSurface() const { return surface; }
vk::SurfaceFormatKHR GetSurfaceFormat() const { return details.format; }
vk::SwapchainKHR GetSwapChain() const { return swapchain; }
const vk::Semaphore& GetAvailableSemaphore() const { return image_available; }
const vk::Semaphore& GetRenderSemaphore() const { return render_finished; }
Texture& GetCurrentImage() { return swapchain_images[image_index]; }
private:
void PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height);
void SetupImages();
private:
SwapChainDetails details{};
vk::SurfaceKHR surface;
// Swapchain state
std::vector<vk::Image> images;
vk::Semaphore image_available, render_finished;
bool vsync_enabled{false}, is_outdated{true}, is_suboptimal{true};
vk::SwapchainKHR swapchain{VK_NULL_HANDLE};
std::vector<Texture> swapchain_images;
u32 image_index{0}, frame_index{0};
u32 current_image = 0, current_frame = 0;
bool vsync_enabled = false;
bool is_outdated = true;
bool is_suboptimal = true;
};
} // namespace Vulkan

View File

@ -2,232 +2,185 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_state.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "common/assert.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
TaskScheduler::~TaskScheduler() {
// 16MB should be enough for a single frame
constexpr BufferInfo STAGING_INFO = {
.capacity = 16 * 1024 * 1024,
.usage = BufferUsage::Staging
};
CommandScheduler::CommandScheduler(Instance& instance) : instance(instance) {
}
CommandScheduler::~CommandScheduler() {
// Destroy Vulkan resources
auto device = g_vk_instace->GetDevice();
device.waitIdle();
vk::Device device = instance.GetDevice();
VmaAllocator allocator = instance.GetAllocator();
for (auto& task : tasks) {
task.staging.Destroy();
device.destroyDescriptorPool(task.pool);
for (auto& command : commands) {
device.destroyFence(command.fence);
// Clean up any scheduled resources
for (auto& func : command.cleanups) {
func(device, allocator);
}
}
SyncToGPU();
device.destroyCommandPool(command_pool);
device.destroySemaphore(timeline);
}
std::tuple<u8*, u32> TaskScheduler::RequestStaging(u32 size) {
auto& task = tasks[current_task];
if (size > STAGING_BUFFER_SIZE - task.current_offset) {
// If we run out of space, allocate a new buffer.
// The old one will be safely destroyed when the task finishes
task.staging.Recreate();
task.current_offset = 0;
return std::make_tuple(task.staging.GetHostPointer(), 0);
}
u8* ptr = task.staging.GetHostPointer() + task.current_offset;
std::memset(ptr, 0, size);
task.current_offset += size;
return std::make_tuple(ptr, task.current_offset - size);
}
Buffer& TaskScheduler::GetStaging() {
return tasks[current_task].staging;
}
bool TaskScheduler::Create() {
auto device = g_vk_instace->GetDevice();
// Create command pool
vk::CommandPoolCreateInfo pool_info(vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
g_vk_instace->GetGraphicsQueueFamilyIndex());
command_pool = device.createCommandPool(pool_info);
// Create timeline semaphore for syncronization
vk::SemaphoreTypeCreateInfo timeline_info{vk::SemaphoreType::eTimeline, 0};
vk::SemaphoreCreateInfo semaphore_info{{}, &timeline_info};
timeline = device.createSemaphore(semaphore_info);
Buffer::Info staging_info{
.size = STAGING_BUFFER_SIZE,
.properties = vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent,
.usage = vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst
bool CommandScheduler::Create() {
vk::Device device = instance.GetDevice();
const vk::CommandPoolCreateInfo pool_info = {
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
};
// Should be enough for a single frame
const vk::DescriptorPoolSize pool_size{vk::DescriptorType::eCombinedImageSampler, 64};
vk::DescriptorPoolCreateInfo pool_create_info{{}, 1024, pool_size};
// Create command pool
command_pool = device.createCommandPool(pool_info);
for (auto& task : tasks) {
// Create command buffers
vk::CommandBufferAllocateInfo buffer_info{command_pool, vk::CommandBufferLevel::ePrimary, 2};
auto buffers = device.allocateCommandBuffers(buffer_info);
std::ranges::copy_n(buffers.begin(), 2, task.command_buffers.begin());
vk::CommandBufferAllocateInfo buffer_info = {
.commandPool = command_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT
};
// Create staging buffer
task.staging.Create(staging_info);
// Allocate all command buffers
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
// Create descriptor pool
task.pool = device.createDescriptorPool(pool_create_info);
// Initialize command slots
for (std::size_t i = 0; i < commands.size(); i++) {
commands[i] = CommandSlot{
.render_command_buffer = command_buffers[2 * i],
.upload_command_buffer = command_buffers[2 * i + 1],
.fence = device.createFence({}),
.upload_buffer = std::make_unique<Buffer>(instance, *this, STAGING_INFO)
};
}
return true;
}
vk::CommandBuffer TaskScheduler::GetRenderCommandBuffer() const {
const auto& task = tasks[current_task];
return task.command_buffers[1];
}
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
auto& task = tasks[current_task];
if (!task.use_upload_buffer) {
auto& cmdbuffer = task.command_buffers[0];
cmdbuffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
task.use_upload_buffer = true;
}
return task.command_buffers[0];
}
vk::DescriptorPool TaskScheduler::GetDescriptorPool() const {
const auto& task = tasks[current_task];
return task.pool;
}
void TaskScheduler::SyncToGPU(u64 task_index) {
// No need to sync if the GPU already has finished the task
auto tick = GetGPUTick();
if (tasks[task_index].task_id <= tick) {
void CommandScheduler::Synchronize() {
// Don't synchronize the same command twicec
CommandSlot& command = commands[current_command];
if (command.fence_counter <= completed_fence_counter) {
return;
}
// Wait for the task to complete
vk::SemaphoreWaitInfo wait_info{{}, timeline, tasks[task_index].task_id};
auto result = g_vk_instace->GetDevice().waitSemaphores(wait_info, UINT64_MAX);
if (result != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Failed waiting for timeline semaphore!");
// Wait for this command buffer to be completed.
vk::Device device = instance.GetDevice();
if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Waiting for fences failed!");
}
// Cleanup resources for command buffers that have completed along with the current one
const u64 now_fence_counter = command.fence_counter;
VmaAllocator allocator = instance.GetAllocator();
for (CommandSlot& command : commands) {
if (command.fence_counter < now_fence_counter &&
command.fence_counter > completed_fence_counter) {
for (auto& func: command.cleanups) {
func(device, allocator);
}
command.cleanups.clear();
}
}
completed_fence_counter = now_fence_counter;
}
void TaskScheduler::SyncToGPU() {
SyncToGPU(current_task);
}
u64 TaskScheduler::GetCPUTick() const {
return current_task_id;
}
u64 TaskScheduler::GetGPUTick() const {
auto device = g_vk_instace->GetDevice();
return device.getSemaphoreCounterValue(timeline);
}
void TaskScheduler::Submit(bool wait_completion, bool present, Swapchain* swapchain) {
// End the current task recording.
auto& task = tasks[current_task];
void CommandScheduler::Submit(bool wait_completion,
vk::Semaphore wait_semaphore,
vk::Semaphore signal_semaphore) {
const CommandSlot& command = commands[current_command];
// End command buffers
task.command_buffers[1].end();
if (task.use_upload_buffer) {
task.command_buffers[0].end();
command.render_command_buffer.end();
if (command.use_upload_buffer) {
command.upload_command_buffer.end();
}
const u32 num_signal_semaphores = present ? 2U : 1U;
const std::array signal_values{task.task_id, u64(0)};
std::array signal_semaphores{timeline, vk::Semaphore{}};
const u32 num_wait_semaphores = present ? 2U : 1U;
const std::array wait_values{task.task_id - 1, u64(1)};
std::array wait_semaphores{timeline, vk::Semaphore{}};
// When the task completes the timeline will increment to the task id
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si{num_wait_semaphores, wait_values.data(),
num_signal_semaphores, signal_values.data()};
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks{
constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks{
vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const u32 cmdbuffer_count = task.use_upload_buffer ? 2u : 1u;
const vk::SubmitInfo submit_info{num_wait_semaphores, wait_semaphores.data(), wait_stage_masks.data(), cmdbuffer_count,
&task.command_buffers[2 - cmdbuffer_count], num_signal_semaphores, signal_semaphores.data(),
&timeline_si};
const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
const u32 command_buffer_count = command.use_upload_buffer ? 2u : 1u;
const std::array command_buffers = { command.render_command_buffer,
command.upload_command_buffer };
// Wait for new swapchain image
if (present) {
signal_semaphores[1] = swapchain->GetRenderSemaphore();
wait_semaphores[1] = swapchain->GetAvailableSemaphore();
}
// Prepeare submit info
const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = &signal_semaphore,
};
// Submit the command buffer
auto queue = g_vk_instace->GetGraphicsQueue();
queue.submit(submit_info);
// Present the image when rendering has finished
if (present) {
swapchain->Present();
}
vk::Queue queue = instance.GetGraphicsQueue();
queue.submit(submit_info, command.fence);
// Block host until the GPU catches up
if (wait_completion) {
SyncToGPU();
Synchronize();
}
// Switch to next cmdbuffer.
BeginTask();
SwitchSlot();
}
void TaskScheduler::Schedule(std::function<void()> func) {
auto& task = tasks[current_task];
task.cleanups.push_back(func);
void CommandScheduler::Schedule(Deleter&& func) {
auto& command = commands[current_command];
command.cleanups.push_back(func);
}
void TaskScheduler::BeginTask() {
u32 next_task_index = (current_task + 1) % TASK_COUNT;
auto& task = tasks[next_task_index];
auto device = g_vk_instace->GetDevice();
vk::CommandBuffer CommandScheduler::GetUploadCommandBuffer() {
CommandSlot& command = commands[current_command];
if (!command.use_upload_buffer) {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
// Wait for the GPU to finish with all resources for this task.
SyncToGPU(next_task_index);
// Delete all resources that can be freed now
for (auto& func : task.cleanups) {
func();
command.upload_command_buffer.begin(begin_info);
command.use_upload_buffer = true;
}
device.resetDescriptorPool(task.pool);
task.command_buffers[1].begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
// Move to the next command buffer.
current_task = next_task_index;
task.task_id = ++current_task_id;
task.current_offset = 0;
task.use_upload_buffer = false;
task.cleanups.clear();
auto& state = VulkanState::Get();
state.InitDescriptorSets();
return command.upload_command_buffer;
}
std::unique_ptr<TaskScheduler> g_vk_task_scheduler;
void CommandScheduler::SwitchSlot() {
current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
CommandSlot& command = commands[current_command];
// Wait for the GPU to finish with all resources for this command.
Synchronize();
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
// Move to the next command buffer.
vk::Device device = instance.GetDevice();
device.resetFences(command.fence);
command.render_command_buffer.begin(begin_info);
command.fence_counter = next_fence_counter++;
command.use_upload_buffer = false;
}
} // namespace Vulkan

View File

@ -4,68 +4,81 @@
#pragma once
#include <memory>
#include <array>
#include "video_core/renderer_vulkan/vk_buffer.h"
#include <functional>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
constexpr u32 TASK_COUNT = 5;
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
class Swapchain;
using Deleter = std::function<void(vk::Device, VmaAllocator)>;
/// Wrapper class around command buffer execution. Handles an arbitrary
/// number of tasks that can be submitted concurrently. This allows the host
/// to start recording the next frame while the GPU is working on the
/// current one. Larger values can be used with caution, as they can cause
/// frame latency if the CPU is too far ahead of the GPU
class TaskScheduler {
class Buffer;
class Instance;
class CommandScheduler {
public:
TaskScheduler() = default;
~TaskScheduler();
CommandScheduler(Instance& instance);
~CommandScheduler();
/// Create and initialize the work scheduler
bool Create();
/// Retrieve either of the current frame's command buffers
vk::CommandBuffer GetRenderCommandBuffer() const;
/// Block host until the current command completes execution
void Synchronize();
/// Defer operation until the current command completes execution
void Schedule(Deleter&& func);
/// Submits the current command to the graphics queue
void Submit(bool wait_completion = false, vk::Semaphore wait = VK_NULL_HANDLE,
vk::Semaphore signal = VK_NULL_HANDLE);
/// Returns the command buffer used for early upload operations.
/// This is useful for vertex/uniform buffer uploads that happen once per frame
vk::CommandBuffer GetUploadCommandBuffer();
vk::DescriptorPool GetDescriptorPool() const;
/// Access the staging buffer of the current task
std::tuple<u8*, u32> RequestStaging(u32 size);
Buffer& GetStaging();
/// Returns the command buffer used for rendering
inline vk::CommandBuffer GetRenderCommandBuffer() const {
const CommandSlot& command = commands[current_command];
return command.render_command_buffer;
}
/// Query and/or synchronization CPU and GPU
u64 GetCPUTick() const;
u64 GetGPUTick() const;
void SyncToGPU();
void SyncToGPU(u64 task_index);
/// Returns the upload buffer of the active command slot
inline Buffer& GetCommandUploadBuffer() {
CommandSlot& command = commands[current_command];
return *command.upload_buffer;
}
void Schedule(std::function<void()> func);
void Submit(bool wait_completion = false, bool present = false, Swapchain* swapchain = nullptr);
void BeginTask();
/// Returns the index of the current command slot
inline u32 GetCurrentSlotIndex() const {
return current_command;
}
private:
struct Task {
/// Activates the next command slot and optionally waits for its completion
void SwitchSlot();
private:
Instance& instance;
u64 next_fence_counter = 1;
u64 completed_fence_counter = 0;
struct CommandSlot {
bool use_upload_buffer = false;
u64 current_offset = 0, task_id = 0;
std::array<vk::CommandBuffer, 2> command_buffers;
std::vector<std::function<void()>> cleanups;
vk::DescriptorPool pool;
Buffer staging;
u64 fence_counter = 0;
vk::CommandBuffer render_command_buffer, upload_command_buffer;
vk::Fence fence = VK_NULL_HANDLE;
std::unique_ptr<Buffer> upload_buffer;
std::vector<Deleter> cleanups;
};
vk::Semaphore timeline;
vk::CommandPool command_pool;
u64 current_task_id = 0;
// Each task contains unique resources
std::array<Task, TASK_COUNT> tasks;
u64 current_task = -1;
vk::CommandPool command_pool = VK_NULL_HANDLE;
std::array<CommandSlot, SCHEDULER_COMMAND_COUNT> commands;
u32 current_command = 0;
};
extern std::unique_ptr<TaskScheduler> g_vk_task_scheduler;
} // namespace Vulkan

View File

@ -2,288 +2,229 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <fstream>
#include <iostream>
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pica_to_vulkan.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_texture.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_state.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
static int BytesPerPixel(vk::Format format) {
inline vk::Format ToVkFormat(TextureFormat format) {
switch (format) {
case vk::Format::eD32SfloatS8Uint:
return 5;
case vk::Format::eD32Sfloat:
case vk::Format::eB8G8R8A8Unorm:
case vk::Format::eR8G8B8A8Uint:
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eD24UnormS8Uint:
return 4;
case vk::Format::eR8G8B8Unorm:
case vk::Format::eR8G8B8Srgb:
return 3;
case vk::Format::eR5G6B5UnormPack16:
case vk::Format::eR5G5B5A1UnormPack16:
case vk::Format::eR4G4B4A4UnormPack16:
case vk::Format::eD16Unorm:
return 2;
case TextureFormat::RGBA8:
return vk::Format::eR8G8B8A8Unorm;
case TextureFormat::RGB8:
return vk::Format::eR8G8B8Unorm;
case TextureFormat::RGB5A1:
return vk::Format::eR5G5B5A1UnormPack16;
case TextureFormat::RGB565:
return vk::Format::eR5G6B5UnormPack16;
case TextureFormat::RGBA4:
return vk::Format::eR4G4B4A4UnormPack16;
case TextureFormat::D16:
return vk::Format::eD16Unorm;
case TextureFormat::D24:
return vk::Format::eX8D24UnormPack32;
case TextureFormat::D24S8:
return vk::Format::eD24UnormS8Uint;
default:
UNREACHABLE();
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
return vk::Format::eUndefined;
}
}
vk::ImageAspectFlags GetImageAspect(vk::Format format) {
vk::ImageAspectFlags flags;
switch (format) {
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD24UnormS8Uint:
case vk::Format::eD32SfloatS8Uint:
flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
break;
case vk::Format::eD16Unorm:
case vk::Format::eD32Sfloat:
flags = vk::ImageAspectFlagBits::eDepth;
break;
inline vk::ImageType ToVkImageType(TextureType type) {
switch (type) {
case TextureType::Texture1D:
return vk::ImageType::e1D;
case TextureType::Texture2D:
return vk::ImageType::e2D;
case TextureType::Texture3D:
return vk::ImageType::e3D;
default:
flags = vk::ImageAspectFlagBits::eColor;
LOG_ERROR(Render_Vulkan, "Unknown texture type {}!", type);
return vk::ImageType::e2D;
}
}
return flags;
inline vk::ImageViewType ToVkImageViewType(TextureViewType view_type) {
switch (view_type) {
case TextureViewType::View1D:
return vk::ImageViewType::e1D;
case TextureViewType::View2D:
return vk::ImageViewType::e2D;
case TextureViewType::View3D:
return vk::ImageViewType::e3D;
case TextureViewType::ViewCube:
return vk::ImageViewType::eCube;
case TextureViewType::View1DArray:
return vk::ImageViewType::e1DArray;
case TextureViewType::View2DArray:
return vk::ImageViewType::e2DArray;
case TextureViewType::ViewCubeArray:
return vk::ImageViewType::eCubeArray;
default:
LOG_ERROR(Render_Vulkan, "Unknown texture view type {}!", view_type);
return vk::ImageViewType::e2D;
}
}
Texture::Texture(Instance& instance, CommandScheduler& scheduler) :
instance(instance), scheduler(scheduler) {}
Texture::Texture(Instance& instance, CommandScheduler& scheduler,
const TextureInfo& info) : TextureBase(info),
instance(instance), scheduler(scheduler) {
// Convert the input format to another that supports attachments
advertised_format = ToVkFormat(info.format);
internal_format = instance.GetFormatAlternative(advertised_format);
aspect = GetImageAspect(advertised_format);
vk::Device device = instance.GetDevice();
const vk::ImageCreateInfo image_info = {
.flags = info.view_type == TextureViewType::ViewCube ?
vk::ImageCreateFlagBits::eCubeCompatible :
vk::ImageCreateFlags{},
.imageType = ToVkImageType(info.type),
.format = internal_format,
.extent = {info.width, info.height, 1},
.mipLevels = info.levels,
.arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u,
.samples = vk::SampleCountFlagBits::e1,
.usage = GetImageUsage(aspect),
};
const VmaAllocationCreateInfo alloc_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
};
VkImage unsafe_image = VK_NULL_HANDLE;
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VmaAllocator allocator = instance.GetAllocator();
// Allocate texture memory
vmaCreateImage(allocator, &unsafe_image_info, &alloc_info, &unsafe_image, &allocation, nullptr);
image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = ToVkImageViewType(info.view_type),
.format = internal_format,
.subresourceRange = {aspect, 0, info.levels, 0, 1}
};
// Create image view
image_view = device.createImageView(view_info);
}
Texture::Texture(Instance& instance, CommandScheduler& scheduler,
vk::Image image, const TextureInfo& info) : TextureBase(info),
instance(instance), scheduler(scheduler), image(image),
is_texture_owned(false) {
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = ToVkImageViewType(info.view_type),
.format = internal_format,
.subresourceRange = {aspect, 0, info.levels, 0, 1}
};
// Create image view
vk::Device device = instance.GetDevice();
image_view = device.createImageView(view_info);
}
Texture::~Texture() {
Destroy();
}
Texture::Texture(Texture&& other) noexcept {
info = std::exchange(other.info, Info{});
texture = std::exchange(other.texture, VK_NULL_HANDLE);
aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone);
view = std::exchange(other.view, VK_NULL_HANDLE);
memory = std::exchange(other.memory, VK_NULL_HANDLE);
image_size = std::exchange(other.image_size, 0);
adopted = std::exchange(other.adopted, false);
is_rgb = std::exchange(other.is_rgb, false);
is_d24s8 = std::exchange(other.is_d24s8, false);
}
Texture& Texture::operator=(Texture&& other) noexcept {
Destroy();
info = std::exchange(other.info, Info{});
texture = std::exchange(other.texture, VK_NULL_HANDLE);
aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone);
view = std::exchange(other.view, VK_NULL_HANDLE);
memory = std::exchange(other.memory, VK_NULL_HANDLE);
image_size = std::exchange(other.image_size, 0);
adopted = std::exchange(other.adopted, false);
is_rgb = std::exchange(other.is_rgb, false);
is_d24s8 = std::exchange(other.is_d24s8, false);
return *this;
}
void Texture::Create(const Info& create_info) {
auto device = g_vk_instace->GetDevice();
info = create_info;
// Emulate RGB8 format with RGBA8
is_rgb = false;
if (info.format == vk::Format::eR8G8B8Unorm) {
is_rgb = true;
info.format = vk::Format::eR8G8B8A8Unorm;
}
is_d24s8 = false;
if (info.format == vk::Format::eD24UnormS8Uint) {
is_d24s8 = true;
info.format = vk::Format::eD32SfloatS8Uint;
}
// Create the texture
image_size = info.width * info.height * BytesPerPixel(info.format);
aspect = GetImageAspect(info.format);
vk::ImageCreateFlags flags{};
if (info.view_type == vk::ImageViewType::eCube) {
flags = vk::ImageCreateFlagBits::eCubeCompatible;
}
vk::ImageCreateInfo image_info {
flags, info.type, info.format,
{ info.width, info.height, 1 }, info.levels, info.layers,
static_cast<vk::SampleCountFlagBits>(info.multisamples),
vk::ImageTiling::eOptimal, info.usage
};
texture = device.createImage(image_info);
// Create texture memory
auto requirements = device.getImageMemoryRequirements(texture);
auto memory_index = Buffer::FindMemoryType(requirements.memoryTypeBits,
vk::MemoryPropertyFlagBits::eDeviceLocal);
vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index);
memory = device.allocateMemory(alloc_info);
device.bindImageMemory(texture, memory, 0);
// Create texture view
vk::ImageViewCreateInfo view_info {
{}, texture, info.view_type, info.format, {},
{aspect, 0, info.levels, 0, info.layers}
};
view = device.createImageView(view_info);
}
void Texture::Create(Texture& other) {
auto info = other.info;
Create(info);
// Copy the buffer contents
auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal);
auto old_layout = other.GetLayout();
other.Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal);
u32 copy_count = 0;
std::array<vk::ImageCopy, 16> copy_regions;
for (u32 i = 0; i < info.levels; i++) {
copy_regions[copy_count++] = vk::ImageCopy{
vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0},
vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0},
{info.width, info.height, 0}
};
}
cmdbuffer.copyImage(other.GetHandle(), vk::ImageLayout::eTransferSrcOptimal,
texture, vk::ImageLayout::eTransferDstOptimal, copy_count,
copy_regions.data());
Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
other.Transition(cmdbuffer, old_layout);
}
void Texture::Adopt(const Info& create_info, vk::Image image) {
info = create_info;
image_size = info.width * info.height * BytesPerPixel(info.format);
aspect = GetImageAspect(info.format);
texture = image;
// Create texture view
vk::ImageViewCreateInfo view_info {
{}, texture, info.view_type, info.format, {},
{aspect, 0, info.levels, 0, info.layers}
};
auto device = g_vk_instace->GetDevice();
view = device.createImageView(view_info);
adopted = true;
}
void Texture::Destroy() {
if (texture && !adopted) {
// Make sure to unbind the texture before destroying it
auto& state = VulkanState::Get();
state.UnbindTexture(*this);
auto deleter = [texture = texture,
view = view,
memory = memory]() {
auto device = g_vk_instace->GetDevice();
if (texture) {
device.destroyImage(texture);
if (image && is_texture_owned) {
auto deleter = [image = image, allocation = allocation,
view = image_view](vk::Device device, VmaAllocator allocator) {
device.destroyImageView(view);
device.freeMemory(memory);
}
vmaDestroyImage(allocator, static_cast<VkImage>(image), allocation);
};
// Schedule deletion of the texture after it's no longer used
// by the GPU
g_vk_task_scheduler->Schedule(deleter);
}
// If the image was adopted (probably from the swapchain) then only
// destroy the view
if (adopted) {
g_vk_task_scheduler->Schedule([view = view](){
auto device = g_vk_instace->GetDevice();
device.destroyImageView(view);
});
// Schedule deletion of the texture after it's no longer used by the GPU
scheduler.Schedule(deleter);
} else if (!is_texture_owned) {
// If the texture is not owning, destroy the view immediately as
// synchronization is the caller's responsibility
vk::Device device = instance.GetDevice();
device.destroyImageView(image_view);
}
}
void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout) {
Transition(cmdbuffer, new_layout, 0, info.levels, 0, info.layers);
}
void Texture::Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
u32 level, u32 level_count) {
ASSERT(level + level_count < TEXTURE_MAX_LEVELS);
void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout,
u32 start_level, u32 level_count, u32 start_layer, u32 layer_count) {
if (new_layout == layout) {
// Ensure all miplevels in the range have the same layout
vk::ImageLayout old_layout = layouts[level];
if (old_layout != vk::ImageLayout::eUndefined) {
for (u32 i = 0; i < level_count; i++) {
ASSERT(layouts[level + i] == old_layout);
}
}
// Don't do anything if the image is already in the wanted layout
if (new_layout == old_layout) {
return;
}
struct LayoutInfo {
vk::ImageLayout layout;
vk::AccessFlags access;
vk::PipelineStageFlags stage;
};
// Get optimal transition settings for every image layout. Settings taken from Dolphin
auto layout_info = [](vk::ImageLayout layout) -> LayoutInfo {
LayoutInfo info{ .layout = layout };
auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
LayoutInfo info;
switch (layout) {
case vk::ImageLayout::eUndefined:
// Layout undefined therefore contents undefined, and we don't care what happens to it.
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
break;
case vk::ImageLayout::ePreinitialized:
// Image has been pre-initialized by the host, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eHostWrite;
info.stage = vk::PipelineStageFlagBits::eHost;
break;
case vk::ImageLayout::eColorAttachmentOptimal:
// Image was being used as a color attachment, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
info.access = vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eColorAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
break;
case vk::ImageLayout::eDepthStencilAttachmentOptimal:
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | vk::AccessFlagBits::eDepthStencilAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests;
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests;
break;
case vk::ImageLayout::ePresentSrcKHR:
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
break;
case vk::ImageLayout::eShaderReadOnlyOptimal:
// Image was being used as a shader resource, make sure all reads have finished.
info.access = vk::AccessFlagBits::eShaderRead;
info.stage = vk::PipelineStageFlagBits::eFragmentShader;
break;
case vk::ImageLayout::eTransferSrcOptimal:
// Image was being used as a copy source, ensure all reads have finished.
info.access = vk::AccessFlagBits::eTransferRead;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
case vk::ImageLayout::eTransferDstOptimal:
// Image was being used as a copy destination, ensure all writes have finished.
info.access = vk::AccessFlagBits::eTransferWrite;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
UNREACHABLE();
@ -292,220 +233,286 @@ void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout
return info;
};
LayoutInfo source = GetLayoutInfo(old_layout);
LayoutInfo dest = GetLayoutInfo(new_layout);
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = image,
.subresourceRange = {aspect, level, level_count, 0, 1}
};
// Submit pipeline barrier
LayoutInfo source = layout_info(layout), dst = layout_info(new_layout);
vk::ImageMemoryBarrier barrier {
source.access, dst.access,
source.layout, dst.layout,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
texture,
vk::ImageSubresourceRange{aspect, start_level, level_count, start_layer, layer_count}
command_buffer.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion,
{}, {}, barrier);
// Update layouts
SetLayout(new_layout, level, level_count);
}
void Texture::SetLayout(vk::ImageLayout new_layout, u32 level, u32 level_count) {
std::fill_n(layouts.begin() + level, level_count, new_layout);
}
void Texture::Upload(Rect2D rectangle, u32 stride, std::span<const u8> data, u32 level) {
const u64 byte_count = data.size();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
// If the adverised format supports blitting then use GPU accelerated
// format conversion.
if (internal_format != advertised_format &&
instance.IsFormatSupported(advertised_format,
vk::FormatFeatureFlagBits::eBlitSrc)) {
// Creating a new staging texture for each upload/download is expensive
// but this path is not common. TODO: Profile this
StagingTexture staging{instance, scheduler, info};
const std::array offsets = {
vk::Offset3D{rectangle.x, rectangle.y, 0},
vk::Offset3D{static_cast<s32>(rectangle.x + rectangle.width),
static_cast<s32>(rectangle.y + rectangle.height), 0}
};
cmdbuffer.pipelineBarrier(source.stage, dst.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
layout = new_layout;
}
void Texture::OverrideImageLayout(vk::ImageLayout new_layout) {
layout = new_layout;
}
void Texture::Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> pixels) {
u32 request_size = is_rgb ? (pixels.size() / 3) * 4 :
(is_d24s8 ? (pixels.size() / 4) * 5 : pixels.size());
auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size);
if (!buffer) {
LOG_ERROR(Render_Vulkan, "Cannot upload pixels without staging buffer!");
}
// Copy pixels to staging buffer
auto& state = VulkanState::Get();
state.EndRendering();
auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
// Automatically convert RGB to RGBA
if (is_rgb) {
auto data = RGBToRGBA(pixels);
std::memcpy(buffer, data.data(), data.size());
}
else if (is_d24s8) {
auto data = D24S8ToD32S8(pixels);
std::memcpy(buffer, data.data(), data.size() * sizeof(data[0]));
}
else {
std::memcpy(buffer, pixels.data(), pixels.size());
}
std::array<vk::BufferImageCopy, 2> copy_regions;
u32 region_count = 1;
copy_regions[0] = vk::BufferImageCopy{
offset, row_length, region.extent.height,
{aspect, level, layer, 1},
{region.offset.x, region.offset.y, 0},
{region.extent.width, region.extent.height, 1}
const vk::ImageBlit image_blit = {
.srcSubresource = {aspect, level, 0, 1},
.srcOffsets = offsets,
.dstSubresource = {aspect, level, 0, 1},
.dstOffsets = offsets
};
if (aspect & vk::ImageAspectFlagBits::eDepth &&
aspect & vk::ImageAspectFlagBits::eStencil) {
// Copying both depth and stencil requires two seperate regions
copy_regions[1] = copy_regions[0];
copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
// Copy data to staging texture
std::memcpy(staging.GetMappedPtr(), data.data(), byte_count);
staging.Commit(byte_count);
region_count++;
}
Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level);
// Transition image to transfer format
Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal);
// Blit
command_buffer.blitImage(staging.GetHandle(), vk::ImageLayout::eGeneral,
image, vk::ImageLayout::eTransferDstOptimal,
image_blit, vk::Filter::eNearest);
cmdbuffer.copyBufferToImage(g_vk_task_scheduler->GetStaging().GetBuffer(),
texture, vk::ImageLayout::eTransferDstOptimal, region_count,
copy_regions.data());
// Otherwise use normal staging buffer path with possible CPU conversion
} else {
Buffer& staging = scheduler.GetCommandUploadBuffer();
const u64 staging_offset = staging.GetCurrentOffset();
// Prepare image for shader reads
Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
}
// Copy pixels to the staging buffer
auto slice = staging.Map(byte_count);
std::memcpy(slice.data(), data.data(), byte_count);
staging.Commit(byte_count);
void Texture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> memory) {
u32 request_size = is_rgb ? (memory.size() / 3) * 4 :
(is_d24s8 ? (memory.size() / 4) * 8 : memory.size());
auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size);
if (!buffer) {
LOG_ERROR(Render_Vulkan, "Cannot download texture without staging buffer!");
}
// TODO: Handle depth and stencil uploads
ASSERT(aspect == vk::ImageAspectFlagBits::eColor &&
advertised_format == internal_format);
auto& state = VulkanState::Get();
state.EndRendering();
auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
std::array<vk::BufferImageCopy, 2> copy_regions;
u32 region_count = 1;
copy_regions[0] = vk::BufferImageCopy{
offset, row_length, region.extent.height,
{aspect, level, layer, 1},
{region.offset.x, region.offset.y, 0},
{region.extent.width, region.extent.height, 1}
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging_offset,
.bufferRowLength = stride,
.bufferImageHeight = rectangle.height,
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {rectangle.x, rectangle.y, 0},
.imageExtent = {rectangle.width, rectangle.height, 1}
};
if (aspect & vk::ImageAspectFlagBits::eDepth &&
aspect & vk::ImageAspectFlagBits::eStencil) {
// Copying both depth and stencil requires two seperate regions
copy_regions[1] = copy_regions[0];
copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level);
region_count++;
// Copy staging buffer to the texture
command_buffer.copyBufferToImage(staging.GetHandle(), image,
vk::ImageLayout::eTransferDstOptimal,
copy_region);
}
// Transition image to transfer format
auto old_layout = GetLayout();
Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal);
Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal);
}
cmdbuffer.copyImageToBuffer(texture, vk::ImageLayout::eTransferSrcOptimal,
g_vk_task_scheduler->GetStaging().GetBuffer(),
region_count, copy_regions.data());
void Texture::Download(Rect2D rectangle, u32 stride, std::span<u8> data, u32 level) {
const u64 byte_count = data.size();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
// Restore layout
Transition(cmdbuffer, old_layout);
// If the adverised format supports blitting then use GPU accelerated
// format conversion.
if (internal_format != advertised_format &&
instance.IsFormatSupported(advertised_format,
vk::FormatFeatureFlagBits::eBlitDst)) {
// Creating a new staging texture for each upload/download is expensive
// but this path is not common. TODO: Profile this
StagingTexture staging{instance, scheduler, info};
// Wait for the data to be available
// NOTE: This is really slow and should be reworked
g_vk_task_scheduler->Submit(true);
const std::array offsets = {
vk::Offset3D{rectangle.x, rectangle.y, 0},
vk::Offset3D{static_cast<s32>(rectangle.x + rectangle.width),
static_cast<s32>(rectangle.y + rectangle.height), 0}
};
// Automatically convert RGB to RGBA
if (is_rgb) {
auto data = RGBAToRGB(std::span(buffer, request_size));
std::memcpy(memory.data(), data.data(), memory.size());
}
else if (is_d24s8) {
auto data = D32S8ToD24S8(std::span(buffer, request_size));
std::memcpy(memory.data(), data.data(), memory.size());
}
else {
std::memcpy(memory.data(), buffer, memory.size());
const vk::ImageBlit image_blit = {
.srcSubresource = {aspect, level, 0, 1},
.srcOffsets = offsets,
.dstSubresource = {aspect, level, 0, 1},
.dstOffsets = offsets
};
Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level);
// Blit
command_buffer.blitImage(image, vk::ImageLayout::eTransferSrcOptimal,
staging.GetHandle(), vk::ImageLayout::eGeneral,
image_blit, vk::Filter::eNearest);
// TODO: Async downloads
scheduler.Submit(true);
// Copy data to the destination
staging.Commit(byte_count);
std::memcpy(data.data(), staging.GetMappedPtr(), byte_count);
// Otherwise use normal staging buffer path with possible CPU conversion
} else {
Buffer& staging = scheduler.GetCommandUploadBuffer();
const u64 staging_offset = staging.GetCurrentOffset();
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging_offset,
.bufferRowLength = stride,
.bufferImageHeight = rectangle.height,
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {rectangle.x, rectangle.y, 0},
.imageExtent = {rectangle.width, rectangle.height, 1}
};
Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level);
// Copy pixel data to the staging buffer
command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
staging.GetHandle(), copy_region);
Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal);
// TODO: Async downloads
scheduler.Submit(true);
// Copy data to the destination
auto memory = staging.Map(byte_count);
std::memcpy(data.data(), memory.data(), byte_count);
}
}
template <typename Out, typename In>
std::span<Out> SpanCast(std::span<In> span) {
return std::span(reinterpret_cast<Out*>(span.data()), span.size_bytes() / sizeof(Out));
StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler,
const TextureInfo& info) :
TextureBase(info), instance(instance), scheduler(scheduler) {
format = ToVkFormat(info.format);
const vk::ImageCreateInfo image_info = {
.flags = info.view_type == TextureViewType::ViewCube ?
vk::ImageCreateFlagBits::eCubeCompatible :
vk::ImageCreateFlags{},
.imageType = ToVkImageType(info.type),
.format = format,
.extent = {info.width, info.height, 1},
.mipLevels = info.levels,
.arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u,
.samples = vk::SampleCountFlagBits::e1,
.usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst,
};
const VmaAllocationCreateInfo alloc_create_info = {
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_MAPPED_BIT,
.usage = VMA_MEMORY_USAGE_AUTO
};
VkImage unsafe_image = VK_NULL_HANDLE;
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
// Allocate texture memory
vmaCreateImage(allocator, &unsafe_image_info, &alloc_create_info,
&unsafe_image, &allocation, &alloc_info);
image = vk::Image{unsafe_image};
// Map memory
mapped_ptr = alloc_info.pMappedData;
// Transition image to VK_IMAGE_LAYOUT_GENERAL. This layout is convenient
// for staging textures since it allows for well defined host access and
// works with vkCmdBlitImage, thus eliminating the need for layout transitions
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eNone,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eGeneral,
.image = image,
.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, info.levels, 0, 1}
};
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eBottomOfPipe,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion,
{}, {}, barrier);
}
std::vector<u8> Texture::RGBToRGBA(std::span<u8> data) {
ASSERT(data.size() % 3 == 0);
StagingTexture::~StagingTexture() {
if (image) {
auto deleter = [allocation = allocation,
image = image](vk::Device device, VmaAllocator allocator) {
vmaDestroyImage(allocator, static_cast<VkImage>(image), allocation);
};
u32 new_size = (data.size() / 3) * 4;
std::vector<u8> rgba(new_size);
u32 dst_pos = 0;
for (u32 i = 0; i < data.size(); i += 3) {
std::memcpy(rgba.data() + dst_pos, data.data() + i, 3);
rgba[dst_pos + 3] = 255u;
dst_pos += 4;
// Schedule deletion of the texture after it's no longer used by the GPU
scheduler.Schedule(deleter);
}
return rgba;
}
std::vector<u64> Texture::D24S8ToD32S8(std::span<u8> data) {
ASSERT(data.size() % 4 == 0);
std::vector<u64> d32s8;
std::span<u32> d24s8 = SpanCast<u32>(data);
d32s8.reserve(data.size() * 2);
std::ranges::transform(d24s8, std::back_inserter(d32s8), [](u32 comp) -> u64 {
// Convert normalized 24bit depth component to floating point
float fdepth = static_cast<float>(comp & 0xFFFFFF) / 0xFFFFFF;
u64 result = static_cast<u64>(comp) << 8;
// Use std::memcpy to avoid the unsafe casting required to preserve the floating
// point bits
std::memcpy(&result, &fdepth, 4);
return result;
});
return d32s8;
void StagingTexture::Commit(u32 size) {
VmaAllocator allocator = instance.GetAllocator();
vmaFlushAllocation(allocator, allocation, 0, size);
}
std::vector<u8> Texture::RGBAToRGB(std::span<u8> data) {
ASSERT(data.size() % 4 == 0);
Sampler::Sampler(Instance& instance, SamplerInfo info) :
SamplerBase(info), instance(instance) {
u32 new_size = (data.size() / 4) * 3;
std::vector<u8> rgb(new_size);
auto properties = instance.GetPhysicalDevice().getProperties();
const auto filtering = PicaToVK::TextureFilterMode(info.mag_filter,
info.min_filter,
info.mip_filter);
const vk::SamplerCreateInfo sampler_info = {
.magFilter = filtering.mag_filter,
.minFilter = filtering.min_filter,
.mipmapMode = filtering.mip_mode,
.addressModeU = PicaToVK::WrapMode(info.wrap_s),
.addressModeV = PicaToVK::WrapMode(info.wrap_t),
.anisotropyEnable = true,
.maxAnisotropy = properties.limits.maxSamplerAnisotropy,
.compareEnable = false,
.compareOp = vk::CompareOp::eAlways,
.borderColor = vk::BorderColor::eIntOpaqueBlack,
.unnormalizedCoordinates = false
};
u32 dst_pos = 0;
for (u32 i = 0; i < data.size(); i += 4) {
std::memcpy(rgb.data() + dst_pos, data.data() + i, 3);
dst_pos += 3;
}
return rgb;
vk::Device device = instance.GetDevice();
sampler = device.createSampler(sampler_info);
}
std::vector<u32> Texture::D32S8ToD24S8(std::span<u8> data) {
ASSERT(data.size() % 8 == 0);
std::vector<u32> d24s8;
std::span<u64> d32s8 = SpanCast<u64>(data);
d24s8.reserve(data.size() / 2);
std::ranges::transform(d32s8, std::back_inserter(d24s8), [](u64 comp) -> u32 {
// Convert floating point to 24bit normalized depth
float fdepth = 0.f;
u32 depth = comp & 0xFFFFFFFF;
std::memcpy(&fdepth, &depth, 4);
u32 stencil = (comp >> 32) & 0xFF;
u64 result = static_cast<u32>(fdepth * 0xFFFFFF) | (stencil << 24);
return result;
});
return d24s8;
Sampler::~Sampler() {
vk::Device device = instance.GetDevice();
device.destroySampler(sampler);
}
} // namespace Vulkan

View File

@ -4,80 +4,147 @@
#pragma once
#include <memory>
#include <span>
#include <functional>
#include <glm/glm.hpp>
#include "common/math_util.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_surface_params.h"
#include "video_core/common/texture.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
namespace VideoCore::Vulkan {
/// Vulkan texture object
class Texture final : public NonCopyable {
// PICA texture have at most 8 mipmap levels
constexpr u32 TEXTURE_MAX_LEVELS = 8;
class Instance;
class CommandScheduler;
/**
* A texture located in GPU memory
*/
class Texture : public VideoCore::TextureBase {
public:
/// Information for the creation of the target texture
struct Info {
u32 width, height;
vk::Format format;
vk::ImageType type;
vk::ImageViewType view_type;
vk::ImageUsageFlags usage;
u32 multisamples = 1;
u32 levels = 1, layers = 1;
};
// Default constructor
Texture(Instance& instance, CommandScheduler& scheduler);
// Constructor for texture creation
Texture(Instance& instance, CommandScheduler& scheduler,
const TextureInfo& info);
// Constructor for not owning textures (swapchain)
Texture(Instance& instance, CommandScheduler& scheduler,
vk::Image image, const TextureInfo& info);
Texture() = default;
~Texture();
/// Enable move operations
Texture(Texture&& other) noexcept;
Texture& operator=(Texture&& other) noexcept;
/// Uploads pixel data to the GPU memory
void Upload(Rect2D rectangle, u32 stride, std::span<const u8> data,
u32 level = 0) override;
/// Create a new Vulkan texture object
void Create(const Info& info);
void Create(Texture& texture);
void Adopt(const Info& info, vk::Image image);
void Destroy();
/// Downloads pixel data from GPU memory
void Download(Rect2D rectangle, u32 stride, std::span<u8> data,
u32 level = 0) override;
/// Query objects
bool IsValid() const { return texture; }
vk::Image GetHandle() const { return texture; }
vk::ImageView GetView() const { return view; }
vk::Format GetFormat() const { return info.format; }
vk::ImageLayout GetLayout() const { return layout; }
u32 GetSamples() const { return info.multisamples; }
u32 GetSize() const { return image_size; }
vk::Rect2D GetArea() const { return {{0, 0},{info.width, info.height}}; }
/// Copies the rectangle area specified to the destionation texture
void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect,
u32 src_level = 0, u32 dest_level = 0) override;
/// Copies CPU side pixel data to the GPU texture buffer
void Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> pixels);
void Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> dst);
/// Overrides the layout of provided image subresource
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
/// Used to transition the image to an optimal layout during transfers
void OverrideImageLayout(vk::ImageLayout new_layout);
void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout);
void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout, u32 start_level, u32 level_count,
u32 start_layer, u32 layer_count);
/// Transitions part of the image to the provided layout
void Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
u32 level = 0, u32 level_count = 1);
/// Returns the underlying vulkan image handle
vk::Image GetHandle() const {
return image;
}
/// Returns the Vulka image view
vk::ImageView GetView() const {
return image_view;
}
/// Returns the internal format backing the texture.
/// It may not match the input pixel format.
vk::Format GetInternalFormat() const {
return internal_format;
}
/// Returns the current image layout
vk::ImageLayout GetLayout(u32 level = 0) const {
return layouts.at(level);
}
/// Returns a rectangle that represents the complete area of the texture
vk::Rect2D GetArea() const {
return {{0, 0},{info.width, info.height}};
}
private:
std::vector<u8> RGBToRGBA(std::span<u8> data);
std::vector<u64> D24S8ToD32S8(std::span<u8> data);
Instance& instance;
CommandScheduler& scheduler;
std::vector<u8> RGBAToRGB(std::span<u8> data);
std::vector<u32> D32S8ToD24S8(std::span<u8> data);
// Vulkan texture handle
vk::Image image = VK_NULL_HANDLE;
vk::ImageView image_view = VK_NULL_HANDLE;
VmaAllocation allocation = nullptr;
bool is_texture_owned = true;
// Texture properties
vk::Format advertised_format = vk::Format::eUndefined;
vk::Format internal_format = vk::Format::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
std::array<vk::ImageLayout, TEXTURE_MAX_LEVELS> layouts;
};
/**
* Staging texture located in CPU memory. Used for intermediate format
* conversions
*/
class StagingTexture : public VideoCore::TextureBase {
public:
StagingTexture(Instance& instance, CommandScheduler& scheduler,
const TextureInfo& info);
~StagingTexture();
/// Flushes any writes made to texture memory
void Commit(u32 size);
/// Returns a span of the mapped texture memory
void* GetMappedPtr() {
return mapped_ptr;
}
/// Returns the staging image handle
vk::Image GetHandle() const {
return image;
}
private:
Texture::Info info{};
vk::ImageLayout layout{};
vk::ImageAspectFlags aspect{};
vk::Image texture;
vk::ImageView view;
vk::DeviceMemory memory;
u32 image_size{};
bool adopted{false};
bool is_rgb{false}, is_d24s8{false};
Instance& instance;
CommandScheduler& scheduler;
vk::Image image = VK_NULL_HANDLE;
VmaAllocation allocation = VK_NULL_HANDLE;
vk::Format format = vk::Format::eUndefined;
u32 capacity = 0;
void* mapped_ptr = nullptr;
};
/**
* Vulkan sampler object
*/
class Sampler : public VideoCore::SamplerBase {
public:
Sampler(Instance& instance, SamplerInfo info);
~Sampler() override;
/// Returns the underlying vulkan sampler handle
vk::Sampler GetHandle() const {
return sampler;
}
private:
Instance& instance;
vk::Sampler sampler;
};
} // namespace Vulkan

View File

@ -56,9 +56,9 @@ struct OutputVertex {
Common::Vec2<float24> tc0;
Common::Vec2<float24> tc1;
float24 tc0_w;
INSERT_PADDING_WORDS(1);
INSERT_PADDING_WORDS_NOINIT(1);
Common::Vec3<float24> view;
INSERT_PADDING_WORDS(1);
INSERT_PADDING_WORDS_NOINIT(1);
Common::Vec2<float24> tc2;
static void ValidateSemantics(const RasterizerRegs& regs);

View File

@ -164,8 +164,10 @@ static void LogCritical(const char* msg) {
void JitShader::Compile_Assert(bool condition, const char* msg) {
if (!condition) {
ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(ABI_PARAM1, reinterpret_cast<std::size_t>(msg));
CallFarFunction(*this, LogCritical);
ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
}
}
@ -595,11 +597,11 @@ void JitShader::Compile_END(Instruction instr) {
}
void JitShader::Compile_BREAKC(Instruction instr) {
Compile_Assert(looping, "BREAKC must be inside a LOOP");
if (looping) {
Compile_Assert(loop_depth, "BREAKC must be inside a LOOP");
if (loop_depth) {
Compile_EvaluateCondition(instr);
ASSERT(loop_break_label);
jnz(*loop_break_label);
ASSERT(!loop_break_labels.empty());
jnz(loop_break_labels.back(), T_NEAR);
}
}
@ -725,9 +727,11 @@ void JitShader::Compile_IF(Instruction instr) {
void JitShader::Compile_LOOP(Instruction instr) {
Compile_Assert(instr.flow_control.dest_offset >= program_counter,
"Backwards loops not supported");
Compile_Assert(!looping, "Nested loops not supported");
looping = true;
Compile_Assert(loop_depth < 1, "Nested loops may not be supported");
if (loop_depth++) {
const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
ABI_PushRegistersAndAdjustStack(*this, loop_save_regs, 0);
}
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
@ -746,16 +750,20 @@ void JitShader::Compile_LOOP(Instruction instr) {
Label l_loop_start;
L(l_loop_start);
loop_break_label = Xbyak::Label();
loop_break_labels.emplace_back(Xbyak::Label());
Compile_Block(instr.flow_control.dest_offset + 1);
add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
sub(LOOPCOUNT, 1); // Increment loop count by 1
jnz(l_loop_start); // Loop if not equal
L(*loop_break_label);
loop_break_label.reset();
looping = false;
L(loop_break_labels.back());
loop_break_labels.pop_back();
if (--loop_depth) {
const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
ABI_PopRegistersAndAdjustStack(*this, loop_save_regs, 0);
}
}
void JitShader::Compile_JMP(Instruction instr) {
@ -892,7 +900,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
// Reset flow control state
program = (CompiledShader*)getCurr();
program_counter = 0;
looping = false;
loop_depth = 0;
instruction_labels.fill(Xbyak::Label());
// Find all `CALL` instructions and identify return locations

View File

@ -120,15 +120,15 @@ private:
/// Mapping of Pica VS instructions to pointers in the emitted code
std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels;
/// Label pointing to the end of the current LOOP block. Used by the BREAKC instruction to break
/// out of the loop.
std::optional<Xbyak::Label> loop_break_label;
/// Labels pointing to the end of each nested LOOP block. Used by the BREAKC instruction to
/// break out of a loop.
std::vector<Xbyak::Label> loop_break_labels;
/// Offsets in code where a return needs to be inserted
std::vector<unsigned> return_offsets;
unsigned program_counter = 0; ///< Offset of the next instruction to decode
bool looping = false; ///< True if compiling a loop, used to check for nested loops
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
CompiledShader* program = nullptr;

View File

@ -13,7 +13,7 @@ namespace Clipper {
using Shader::OutputVertex;
void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2);
void ProcessTriangle(const & v0, const OutputVertex& v1, const OutputVertex& v2);
} // namespace Clipper
} // namespace Pica

View File

@ -46,7 +46,6 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory)
OpenGL::GLES = Settings::values.use_gles;
//g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window);
g_renderer = std::make_unique<Vulkan::RendererVulkan>(emu_window);
ResultStatus result = g_renderer->Init();

View File

@ -6,8 +6,8 @@
#include <atomic>
#include <iostream>
#include <functional>
#include <memory>
#include <functional>
#include "core/frontend/emu_window.h"
namespace Frontend {