Compare commits

...

12 Commits

Author SHA1 Message Date
f44933930d Android 260 2024-02-22 00:57:08 +00:00
e96826419b Merge yuzu-emu#13096 2024-02-22 00:57:08 +00:00
3f232161be Merge yuzu-emu#13075 2024-02-22 00:57:08 +00:00
98eb3df15c Merge yuzu-emu#13000 2024-02-22 00:57:08 +00:00
74ba9dde58 Merge yuzu-emu#12982 2024-02-22 00:57:07 +00:00
f3f63aa1e7 Merge yuzu-emu#12749 2024-02-22 00:57:07 +00:00
1bea1ee418 Merge yuzu-emu#12461 2024-02-22 00:57:07 +00:00
8bbc209950 Merge pull request #13105 from t895/connection-fix
android: Misc controller fixes
2024-02-21 10:43:46 -05:00
9e1a67b950 fs: add missing mutex header for member (#13106) 2024-02-21 16:43:05 +01:00
de5422b1fd android: Connect controllers with supported styles
If you tried to connect a controller that was previously configured with an unsupported style for your game, when you try to connect that controller, it will immediately disconnect. This ensures that the controller that is being connected will be changed to the first supported style index before being connected.
2024-02-21 08:37:55 -05:00
45f450fca5 android: Add additional check for hasMapping
Controls can have no mapping if they are either "[empty]" or and empty string. This was causing an issue if you reset mapping on all controllers and then tried to play a game. The check to determine whether auto mapping was required would fail and leave you will no mapped controllers. This feels a bit like user error but it smooths things out if you forget so I see it as necessary.
2024-02-21 08:17:30 -05:00
9a3fd76b25 android: Enable all controller styles on emulation shutdown 2024-02-21 08:13:54 -05:00
73 changed files with 13258 additions and 1666 deletions

View File

@ -1,3 +1,17 @@
| Pull Request | Commit | Title | Author | Merged? |
|----|----|----|----|----|
| [12461](https://github.com/yuzu-emu/yuzu//pull/12461) | [`2831f5dc6`](https://github.com/yuzu-emu/yuzu//pull/12461/files) | Rework Nvdec and VIC to fix out-of-order videos, and speed up decoding. | [Kelebek1](https://github.com/Kelebek1/) | Yes |
| [12749](https://github.com/yuzu-emu/yuzu//pull/12749) | [`aad4b0d6f`](https://github.com/yuzu-emu/yuzu//pull/12749/files) | general: workarounds for SMMU syncing issues | [liamwhite](https://github.com/liamwhite/) | Yes |
| [12982](https://github.com/yuzu-emu/yuzu//pull/12982) | [`ef5027712`](https://github.com/yuzu-emu/yuzu//pull/12982/files) | fs: Add FileSystemAccessor and use cmif serialization | [FearlessTobi](https://github.com/FearlessTobi/) | Yes |
| [13000](https://github.com/yuzu-emu/yuzu//pull/13000) | [`461eaca7e`](https://github.com/yuzu-emu/yuzu//pull/13000/files) | device_memory_manager: skip unregistered interfaces on invalidate | [liamwhite](https://github.com/liamwhite/) | Yes |
| [13075](https://github.com/yuzu-emu/yuzu//pull/13075) | [`f46dc3168`](https://github.com/yuzu-emu/yuzu//pull/13075/files) | shader_recompiler: throw on missing geometry streams in geometry shaders | [liamwhite](https://github.com/liamwhite/) | Yes |
| [13096](https://github.com/yuzu-emu/yuzu//pull/13096) | [`0a8759057`](https://github.com/yuzu-emu/yuzu//pull/13096/files) | texture_cache: use two-pass collection for costly load resources | [liamwhite](https://github.com/liamwhite/) | Yes |
End of merge log. You can find the original README.md below the break.
-----
<!-- <!--
SPDX-FileCopyrightText: 2018 yuzu Emulator Project SPDX-FileCopyrightText: 2018 yuzu Emulator Project
SPDX-License-Identifier: GPL-2.0-or-later SPDX-License-Identifier: GPL-2.0-or-later

View File

@ -314,3 +314,10 @@ endif()
if (NOT TARGET SimpleIni::SimpleIni) if (NOT TARGET SimpleIni::SimpleIni)
add_subdirectory(simpleini) add_subdirectory(simpleini)
endif() endif()
# sse2neon
if (ARCHITECTURE_arm64 AND NOT TARGET sse2neon)
add_library(sse2neon INTERFACE)
target_include_directories(sse2neon INTERFACE sse2neon)
endif()

9282
externals/sse2neon/sse2neon.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -164,6 +164,7 @@ else()
if (MINGW) if (MINGW)
add_definitions(-DMINGW_HAS_SECURE_API) add_definitions(-DMINGW_HAS_SECURE_API)
add_compile_options("-msse4.1")
if (MINGW_STATIC_BUILD) if (MINGW_STATIC_BUILD)
add_definitions(-DQT_STATICPLUGIN) add_definitions(-DQT_STATICPLUGIN)

View File

@ -64,17 +64,17 @@ data class PlayerInput(
fun hasMapping(): Boolean { fun hasMapping(): Boolean {
var hasMapping = false var hasMapping = false
buttons.forEach { buttons.forEach {
if (it != "[empty]") { if (it != "[empty]" && it.isNotEmpty()) {
hasMapping = true hasMapping = true
} }
} }
analogs.forEach { analogs.forEach {
if (it != "[empty]") { if (it != "[empty]" && it.isNotEmpty()) {
hasMapping = true hasMapping = true
} }
} }
motions.forEach { motions.forEach {
if (it != "[empty]") { if (it != "[empty]" && it.isNotEmpty()) {
hasMapping = true hasMapping = true
} }
} }

View File

@ -292,6 +292,9 @@ void EmulationSession::ShutdownEmulation() {
// Unload user input. // Unload user input.
m_system.HIDCore().UnloadInputDevices(); m_system.HIDCore().UnloadInputDevices();
// Enable all controllers
m_system.HIDCore().SetSupportedStyleTag({Core::HID::NpadStyleSet::All});
// Shutdown the main emulated process // Shutdown the main emulated process
if (m_load_result == Core::SystemResultStatus::Success) { if (m_load_result == Core::SystemResultStatus::Success) {
m_system.DetachDebugger(); m_system.DetachDebugger();

View File

@ -102,8 +102,50 @@ void ApplyControllerConfig(size_t player_index,
} }
} }
std::vector<s32> GetSupportedStyles(int player_index) {
auto& hid_core = EmulationSession::GetInstance().System().HIDCore();
const auto npad_style_set = hid_core.GetSupportedStyleTag();
std::vector<s32> supported_indexes;
if (npad_style_set.fullkey == 1) {
supported_indexes.push_back(static_cast<s32>(Core::HID::NpadStyleIndex::Fullkey));
}
if (npad_style_set.joycon_dual == 1) {
supported_indexes.push_back(static_cast<s32>(Core::HID::NpadStyleIndex::JoyconDual));
}
if (npad_style_set.joycon_left == 1) {
supported_indexes.push_back(static_cast<s32>(Core::HID::NpadStyleIndex::JoyconLeft));
}
if (npad_style_set.joycon_right == 1) {
supported_indexes.push_back(static_cast<s32>(Core::HID::NpadStyleIndex::JoyconRight));
}
if (player_index == 0 && npad_style_set.handheld == 1) {
supported_indexes.push_back(static_cast<s32>(Core::HID::NpadStyleIndex::Handheld));
}
if (npad_style_set.gamecube == 1) {
supported_indexes.push_back(static_cast<s32>(Core::HID::NpadStyleIndex::GameCube));
}
return supported_indexes;
}
void ConnectController(size_t player_index, bool connected) { void ConnectController(size_t player_index, bool connected) {
auto& hid_core = EmulationSession::GetInstance().System().HIDCore(); auto& hid_core = EmulationSession::GetInstance().System().HIDCore();
ApplyControllerConfig(player_index, [&](Core::HID::EmulatedController* controller) {
auto supported_styles = GetSupportedStyles(player_index);
auto controller_style = controller->GetNpadStyleIndex(true);
auto style = std::find(supported_styles.begin(), supported_styles.end(),
static_cast<int>(controller_style));
if (style == supported_styles.end() && !supported_styles.empty()) {
controller->SetNpadStyleIndex(
static_cast<Core::HID::NpadStyleIndex>(supported_styles[0]));
}
});
if (player_index == 0) { if (player_index == 0) {
auto* handheld = hid_core.GetEmulatedController(Core::HID::NpadIdType::Handheld); auto* handheld = hid_core.GetEmulatedController(Core::HID::NpadIdType::Handheld);
auto* player_one = hid_core.GetEmulatedController(Core::HID::NpadIdType::Player1); auto* player_one = hid_core.GetEmulatedController(Core::HID::NpadIdType::Player1);
@ -522,36 +564,10 @@ jint Java_org_yuzu_yuzu_1emu_features_input_NativeInput_getButtonNameImpl(JNIEnv
jintArray Java_org_yuzu_yuzu_1emu_features_input_NativeInput_getSupportedStyleTagsImpl( jintArray Java_org_yuzu_yuzu_1emu_features_input_NativeInput_getSupportedStyleTagsImpl(
JNIEnv* env, jobject j_obj, jint j_player_index) { JNIEnv* env, jobject j_obj, jint j_player_index) {
auto& hid_core = EmulationSession::GetInstance().System().HIDCore(); auto supported_styles = GetSupportedStyles(j_player_index);
const auto npad_style_set = hid_core.GetSupportedStyleTag(); jintArray j_supported_indexes = env->NewIntArray(supported_styles.size());
std::vector<s32> supported_indexes; env->SetIntArrayRegion(j_supported_indexes, 0, supported_styles.size(),
if (npad_style_set.fullkey == 1) { supported_styles.data());
supported_indexes.push_back(static_cast<u32>(Core::HID::NpadStyleIndex::Fullkey));
}
if (npad_style_set.joycon_dual == 1) {
supported_indexes.push_back(static_cast<u32>(Core::HID::NpadStyleIndex::JoyconDual));
}
if (npad_style_set.joycon_left == 1) {
supported_indexes.push_back(static_cast<u32>(Core::HID::NpadStyleIndex::JoyconLeft));
}
if (npad_style_set.joycon_right == 1) {
supported_indexes.push_back(static_cast<u32>(Core::HID::NpadStyleIndex::JoyconRight));
}
if (j_player_index == 0 && npad_style_set.handheld == 1) {
supported_indexes.push_back(static_cast<u32>(Core::HID::NpadStyleIndex::Handheld));
}
if (npad_style_set.gamecube == 1) {
supported_indexes.push_back(static_cast<u32>(Core::HID::NpadStyleIndex::GameCube));
}
jintArray j_supported_indexes = env->NewIntArray(supported_indexes.size());
env->SetIntArrayRegion(j_supported_indexes, 0, supported_indexes.size(),
supported_indexes.data());
return j_supported_indexes; return j_supported_indexes;
} }

View File

@ -59,8 +59,12 @@ add_library(core STATIC
file_sys/fs_path.h file_sys/fs_path.h
file_sys/fs_path_utility.h file_sys/fs_path_utility.h
file_sys/fs_string_util.h file_sys/fs_string_util.h
file_sys/fsa/fs_i_directory.h
file_sys/fsa/fs_i_file.h
file_sys/fsa/fs_i_filesystem.h
file_sys/fsmitm_romfsbuild.cpp file_sys/fsmitm_romfsbuild.cpp
file_sys/fsmitm_romfsbuild.h file_sys/fsmitm_romfsbuild.h
file_sys/fssrv/fssrv_sf_path.h
file_sys/fssystem/fs_i_storage.h file_sys/fssystem/fs_i_storage.h
file_sys/fssystem/fs_types.h file_sys/fssystem/fs_types.h
file_sys/fssystem/fssystem_aes_ctr_counter_extended_storage.cpp file_sys/fssystem/fssystem_aes_ctr_counter_extended_storage.cpp

View File

@ -43,6 +43,8 @@ public:
DeviceMemoryManager(const DeviceMemory& device_memory); DeviceMemoryManager(const DeviceMemory& device_memory);
~DeviceMemoryManager(); ~DeviceMemoryManager();
static constexpr bool HAS_FLUSH_INVALIDATION = true;
void BindInterface(DeviceInterface* device_inter); void BindInterface(DeviceInterface* device_inter);
DAddr Allocate(size_t size); DAddr Allocate(size_t size);

View File

@ -522,13 +522,17 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
auto* memory_device_inter = registered_processes[asid.id]; auto* memory_device_inter = registered_processes[asid.id];
const auto release_pending = [&] { const auto release_pending = [&] {
if (uncache_bytes > 0) { if (uncache_bytes > 0) {
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, if (memory_device_inter != nullptr) {
uncache_bytes, false); MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
uncache_bytes, false);
}
uncache_bytes = 0; uncache_bytes = 0;
} }
if (cache_bytes > 0) { if (cache_bytes > 0) {
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, if (memory_device_inter != nullptr) {
cache_bytes, true); MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS,
cache_bytes, true);
}
cache_bytes = 0; cache_bytes = 0;
} }
}; };

View File

@ -23,6 +23,8 @@ enum class OpenDirectoryMode : u64 {
File = (1 << 1), File = (1 << 1),
All = (Directory | File), All = (Directory | File),
NotRequireFileSize = (1ULL << 31),
}; };
DECLARE_ENUM_FLAG_OPERATORS(OpenDirectoryMode) DECLARE_ENUM_FLAG_OPERATORS(OpenDirectoryMode)
@ -36,4 +38,29 @@ enum class CreateOption : u8 {
BigFile = (1 << 0), BigFile = (1 << 0),
}; };
struct FileSystemAttribute {
u8 dir_entry_name_length_max_defined;
u8 file_entry_name_length_max_defined;
u8 dir_path_name_length_max_defined;
u8 file_path_name_length_max_defined;
INSERT_PADDING_BYTES_NOINIT(0x5);
u8 utf16_dir_entry_name_length_max_defined;
u8 utf16_file_entry_name_length_max_defined;
u8 utf16_dir_path_name_length_max_defined;
u8 utf16_file_path_name_length_max_defined;
INSERT_PADDING_BYTES_NOINIT(0x18);
s32 dir_entry_name_length_max;
s32 file_entry_name_length_max;
s32 dir_path_name_length_max;
s32 file_path_name_length_max;
INSERT_PADDING_WORDS_NOINIT(0x5);
s32 utf16_dir_entry_name_length_max;
s32 utf16_file_entry_name_length_max;
s32 utf16_dir_path_name_length_max;
s32 utf16_file_path_name_length_max;
INSERT_PADDING_WORDS_NOINIT(0x18);
INSERT_PADDING_WORDS_NOINIT(0x1);
};
static_assert(sizeof(FileSystemAttribute) == 0xC0, "FileSystemAttribute has incorrect size");
} // namespace FileSys } // namespace FileSys

View File

@ -10,7 +10,7 @@ namespace FileSys {
constexpr size_t RequiredAlignment = alignof(u64); constexpr size_t RequiredAlignment = alignof(u64);
void* AllocateUnsafe(size_t size) { inline void* AllocateUnsafe(size_t size) {
// Allocate // Allocate
void* const ptr = ::operator new(size, std::align_val_t{RequiredAlignment}); void* const ptr = ::operator new(size, std::align_val_t{RequiredAlignment});
@ -21,16 +21,16 @@ void* AllocateUnsafe(size_t size) {
return ptr; return ptr;
} }
void DeallocateUnsafe(void* ptr, size_t size) { inline void DeallocateUnsafe(void* ptr, size_t size) {
// Deallocate the pointer // Deallocate the pointer
::operator delete(ptr, std::align_val_t{RequiredAlignment}); ::operator delete(ptr, std::align_val_t{RequiredAlignment});
} }
void* Allocate(size_t size) { inline void* Allocate(size_t size) {
return AllocateUnsafe(size); return AllocateUnsafe(size);
} }
void Deallocate(void* ptr, size_t size) { inline void Deallocate(void* ptr, size_t size) {
// If the pointer is non-null, deallocate it // If the pointer is non-null, deallocate it
if (ptr != nullptr) { if (ptr != nullptr) {
DeallocateUnsafe(ptr, size); DeallocateUnsafe(ptr, size);

View File

@ -381,7 +381,7 @@ public:
// Check that it's possible for us to remove a child // Check that it's possible for us to remove a child
auto* p = m_write_buffer.Get(); auto* p = m_write_buffer.Get();
s32 len = std::strlen(p); s32 len = static_cast<s32>(std::strlen(p));
R_UNLESS(len != 1 || (p[0] != '/' && p[0] != '.'), ResultNotImplemented); R_UNLESS(len != 1 || (p[0] != '/' && p[0] != '.'), ResultNotImplemented);
// Handle a trailing separator // Handle a trailing separator

View File

@ -426,9 +426,10 @@ public:
R_SUCCEED(); R_SUCCEED();
} }
static Result Normalize(char* dst, size_t* out_len, const char* path, size_t max_out_size, static constexpr Result Normalize(char* dst, size_t* out_len, const char* path,
bool is_windows_path, bool is_drive_relative_path, size_t max_out_size, bool is_windows_path,
bool allow_all_characters = false) { bool is_drive_relative_path,
bool allow_all_characters = false) {
// Use StringTraits names for remainder of scope // Use StringTraits names for remainder of scope
using namespace StringTraits; using namespace StringTraits;

View File

@ -19,6 +19,11 @@ constexpr int Strlen(const T* str) {
return length; return length;
} }
template <typename T>
constexpr int Strnlen(const T* str, std::size_t count) {
return Strnlen(str, static_cast<int>(count));
}
template <typename T> template <typename T>
constexpr int Strnlen(const T* str, int count) { constexpr int Strnlen(const T* str, int count) {
ASSERT(str != nullptr); ASSERT(str != nullptr);
@ -32,6 +37,11 @@ constexpr int Strnlen(const T* str, int count) {
return length; return length;
} }
template <typename T>
constexpr int Strncmp(const T* lhs, const T* rhs, std::size_t count) {
return Strncmp(lhs, rhs, static_cast<int>(count));
}
template <typename T> template <typename T>
constexpr int Strncmp(const T* lhs, const T* rhs, int count) { constexpr int Strncmp(const T* lhs, const T* rhs, int count) {
ASSERT(lhs != nullptr); ASSERT(lhs != nullptr);
@ -51,6 +61,11 @@ constexpr int Strncmp(const T* lhs, const T* rhs, int count) {
return l - r; return l - r;
} }
template <typename T>
static constexpr int Strlcpy(T* dst, const T* src, std::size_t count) {
return Strlcpy<T>(dst, src, static_cast<int>(count));
}
template <typename T> template <typename T>
static constexpr int Strlcpy(T* dst, const T* src, int count) { static constexpr int Strlcpy(T* dst, const T* src, int count) {
ASSERT(dst != nullptr); ASSERT(dst != nullptr);

View File

@ -0,0 +1,91 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_types.h"
#include "core/file_sys/errors.h"
#include "core/file_sys/fs_directory.h"
#include "core/file_sys/fs_file.h"
#include "core/file_sys/fs_filesystem.h"
#include "core/file_sys/savedata_factory.h"
#include "core/file_sys/vfs/vfs.h"
#include "core/hle/result.h"
namespace FileSys::Fsa {
class IDirectory {
public:
explicit IDirectory(VirtualDir backend_, OpenDirectoryMode mode)
: backend(std::move(backend_)) {
// TODO(DarkLordZach): Verify that this is the correct behavior.
// Build entry index now to save time later.
if (True(mode & OpenDirectoryMode::Directory)) {
BuildEntryIndex(backend->GetSubdirectories(), DirectoryEntryType::Directory);
}
if (True(mode & OpenDirectoryMode::File)) {
BuildEntryIndex(backend->GetFiles(), DirectoryEntryType::File);
}
}
virtual ~IDirectory() {}
Result Read(s64* out_count, DirectoryEntry* out_entries, s64 max_entries) {
R_UNLESS(out_count != nullptr, ResultNullptrArgument);
if (max_entries == 0) {
*out_count = 0;
R_SUCCEED();
}
R_UNLESS(out_entries != nullptr, ResultNullptrArgument);
R_UNLESS(max_entries > 0, ResultInvalidArgument);
R_RETURN(this->DoRead(out_count, out_entries, max_entries));
}
Result GetEntryCount(s64* out) {
R_UNLESS(out != nullptr, ResultNullptrArgument);
R_RETURN(this->DoGetEntryCount(out));
}
private:
Result DoRead(s64* out_count, DirectoryEntry* out_entries, s64 max_entries) {
const u64 actual_entries =
std::min(static_cast<u64>(max_entries), entries.size() - next_entry_index);
const auto* begin = reinterpret_cast<u8*>(entries.data() + next_entry_index);
const auto* end = reinterpret_cast<u8*>(entries.data() + next_entry_index + actual_entries);
const auto range_size = static_cast<std::size_t>(std::distance(begin, end));
next_entry_index += actual_entries;
*out_count = actual_entries;
std::memcpy(out_entries, begin, range_size);
R_SUCCEED();
}
Result DoGetEntryCount(s64* out) {
*out = entries.size() - next_entry_index;
R_SUCCEED();
}
// TODO: Remove this when VFS is gone
template <typename T>
void BuildEntryIndex(const std::vector<T>& new_data, DirectoryEntryType type) {
entries.reserve(entries.size() + new_data.size());
for (const auto& new_entry : new_data) {
auto name = new_entry->GetName();
if (type == DirectoryEntryType::File && name == GetSaveDataSizeFileName()) {
continue;
}
entries.emplace_back(name, static_cast<s8>(type),
type == DirectoryEntryType::Directory ? 0 : new_entry->GetSize());
}
}
VirtualDir backend;
std::vector<DirectoryEntry> entries;
u64 next_entry_index = 0;
};
} // namespace FileSys::Fsa

View File

@ -0,0 +1,167 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/overflow.h"
#include "core/file_sys/errors.h"
#include "core/file_sys/fs_file.h"
#include "core/file_sys/fs_filesystem.h"
#include "core/file_sys/fs_operate_range.h"
#include "core/file_sys/vfs/vfs.h"
#include "core/file_sys/vfs/vfs_types.h"
#include "core/hle/result.h"
namespace FileSys::Fsa {
class IFile {
public:
explicit IFile(VirtualFile backend_) : backend(std::move(backend_)) {}
virtual ~IFile() {}
Result Read(size_t* out, s64 offset, void* buffer, size_t size, const ReadOption& option) {
// Check that we have an output pointer
R_UNLESS(out != nullptr, ResultNullptrArgument);
// If we have nothing to read, just succeed
if (size == 0) {
*out = 0;
R_SUCCEED();
}
// Check that the read is valid
R_UNLESS(buffer != nullptr, ResultNullptrArgument);
R_UNLESS(offset >= 0, ResultOutOfRange);
R_UNLESS(Common::CanAddWithoutOverflow<s64>(offset, size), ResultOutOfRange);
// Do the read
R_RETURN(this->DoRead(out, offset, buffer, size, option));
}
Result Read(size_t* out, s64 offset, void* buffer, size_t size) {
R_RETURN(this->Read(out, offset, buffer, size, ReadOption::None));
}
Result GetSize(s64* out) {
R_UNLESS(out != nullptr, ResultNullptrArgument);
R_RETURN(this->DoGetSize(out));
}
Result Flush() {
R_RETURN(this->DoFlush());
}
Result Write(s64 offset, const void* buffer, size_t size, const WriteOption& option) {
// Handle the zero-size case
if (size == 0) {
if (option.HasFlushFlag()) {
R_TRY(this->Flush());
}
R_SUCCEED();
}
// Check the write is valid
R_UNLESS(buffer != nullptr, ResultNullptrArgument);
R_UNLESS(offset >= 0, ResultOutOfRange);
R_UNLESS(Common::CanAddWithoutOverflow<s64>(offset, size), ResultOutOfRange);
R_RETURN(this->DoWrite(offset, buffer, size, option));
}
Result SetSize(s64 size) {
R_UNLESS(size >= 0, ResultOutOfRange);
R_RETURN(this->DoSetSize(size));
}
Result OperateRange(void* dst, size_t dst_size, OperationId op_id, s64 offset, s64 size,
const void* src, size_t src_size) {
R_RETURN(this->DoOperateRange(dst, dst_size, op_id, offset, size, src, src_size));
}
Result OperateRange(OperationId op_id, s64 offset, s64 size) {
R_RETURN(this->DoOperateRange(nullptr, 0, op_id, offset, size, nullptr, 0));
}
protected:
Result DryRead(size_t* out, s64 offset, size_t size, const ReadOption& option,
OpenMode open_mode) {
// Check that we can read
R_UNLESS(static_cast<u32>(open_mode & OpenMode::Read) != 0, ResultReadNotPermitted);
// Get the file size, and validate our offset
s64 file_size = 0;
R_TRY(this->DoGetSize(std::addressof(file_size)));
R_UNLESS(offset <= file_size, ResultOutOfRange);
*out = static_cast<size_t>(std::min(file_size - offset, static_cast<s64>(size)));
R_SUCCEED();
}
Result DrySetSize(s64 size, OpenMode open_mode) {
// Check that we can write
R_UNLESS(static_cast<u32>(open_mode & OpenMode::Write) != 0, ResultWriteNotPermitted);
R_SUCCEED();
}
Result DryWrite(bool* out_append, s64 offset, size_t size, const WriteOption& option,
OpenMode open_mode) {
// Check that we can write
R_UNLESS(static_cast<u32>(open_mode & OpenMode::Write) != 0, ResultWriteNotPermitted);
// Get the file size
s64 file_size = 0;
R_TRY(this->DoGetSize(&file_size));
// Determine if we need to append
*out_append = false;
if (file_size < offset + static_cast<s64>(size)) {
R_UNLESS(static_cast<u32>(open_mode & OpenMode::AllowAppend) != 0,
ResultFileExtensionWithoutOpenModeAllowAppend);
*out_append = true;
}
R_SUCCEED();
}
private:
Result DoRead(size_t* out, s64 offset, void* buffer, size_t size, const ReadOption& option) {
const auto read_size = backend->Read(static_cast<u8*>(buffer), size, offset);
*out = read_size;
R_SUCCEED();
}
Result DoGetSize(s64* out) {
*out = backend->GetSize();
R_SUCCEED();
}
Result DoFlush() {
// Exists for SDK compatibiltity -- No need to flush file.
R_SUCCEED();
}
Result DoWrite(s64 offset, const void* buffer, size_t size, const WriteOption& option) {
const std::size_t written = backend->Write(static_cast<const u8*>(buffer), size, offset);
ASSERT_MSG(written == size,
"Could not write all bytes to file (requested={:016X}, actual={:016X}).", size,
written);
R_SUCCEED();
}
Result DoSetSize(s64 size) {
backend->Resize(size);
R_SUCCEED();
}
Result DoOperateRange(void* dst, size_t dst_size, OperationId op_id, s64 offset, s64 size,
const void* src, size_t src_size) {
R_THROW(ResultNotImplemented);
}
VirtualFile backend;
};
} // namespace FileSys::Fsa

View File

@ -0,0 +1,206 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "core/file_sys/errors.h"
#include "core/file_sys/fs_filesystem.h"
#include "core/file_sys/fs_path.h"
#include "core/file_sys/vfs/vfs_types.h"
#include "core/hle/result.h"
#include "core/hle/service/filesystem/filesystem.h"
namespace FileSys::Fsa {
class IFile;
class IDirectory;
enum class QueryId : u32 {
SetConcatenationFileAttribute = 0,
UpdateMac = 1,
IsSignedSystemPartitionOnSdCardValid = 2,
QueryUnpreparedFileInformation = 3,
};
class IFileSystem {
public:
explicit IFileSystem(VirtualDir backend_) : backend{std::move(backend_)} {}
virtual ~IFileSystem() {}
Result CreateFile(const Path& path, s64 size, CreateOption option) {
R_UNLESS(size >= 0, ResultOutOfRange);
R_RETURN(this->DoCreateFile(path, size, static_cast<int>(option)));
}
Result CreateFile(const Path& path, s64 size) {
R_RETURN(this->CreateFile(path, size, CreateOption::None));
}
Result DeleteFile(const Path& path) {
R_RETURN(this->DoDeleteFile(path));
}
Result CreateDirectory(const Path& path) {
R_RETURN(this->DoCreateDirectory(path));
}
Result DeleteDirectory(const Path& path) {
R_RETURN(this->DoDeleteDirectory(path));
}
Result DeleteDirectoryRecursively(const Path& path) {
R_RETURN(this->DoDeleteDirectoryRecursively(path));
}
Result RenameFile(const Path& old_path, const Path& new_path) {
R_RETURN(this->DoRenameFile(old_path, new_path));
}
Result RenameDirectory(const Path& old_path, const Path& new_path) {
R_RETURN(this->DoRenameDirectory(old_path, new_path));
}
Result GetEntryType(DirectoryEntryType* out, const Path& path) {
R_RETURN(this->DoGetEntryType(out, path));
}
Result OpenFile(VirtualFile* out_file, const Path& path, OpenMode mode) {
R_UNLESS(out_file != nullptr, ResultNullptrArgument);
R_UNLESS(static_cast<u32>(mode & OpenMode::ReadWrite) != 0, ResultInvalidOpenMode);
R_UNLESS(static_cast<u32>(mode & ~OpenMode::All) == 0, ResultInvalidOpenMode);
R_RETURN(this->DoOpenFile(out_file, path, mode));
}
Result OpenDirectory(VirtualDir* out_dir, const Path& path, OpenDirectoryMode mode) {
R_UNLESS(out_dir != nullptr, ResultNullptrArgument);
R_UNLESS(static_cast<u64>(mode & OpenDirectoryMode::All) != 0, ResultInvalidOpenMode);
R_UNLESS(static_cast<u64>(
mode & ~(OpenDirectoryMode::All | OpenDirectoryMode::NotRequireFileSize)) == 0,
ResultInvalidOpenMode);
R_RETURN(this->DoOpenDirectory(out_dir, path, mode));
}
Result Commit() {
R_RETURN(this->DoCommit());
}
Result GetFreeSpaceSize(s64* out, const Path& path) {
R_UNLESS(out != nullptr, ResultNullptrArgument);
R_RETURN(this->DoGetFreeSpaceSize(out, path));
}
Result GetTotalSpaceSize(s64* out, const Path& path) {
R_UNLESS(out != nullptr, ResultNullptrArgument);
R_RETURN(this->DoGetTotalSpaceSize(out, path));
}
Result CleanDirectoryRecursively(const Path& path) {
R_RETURN(this->DoCleanDirectoryRecursively(path));
}
Result GetFileTimeStampRaw(FileTimeStampRaw* out, const Path& path) {
R_UNLESS(out != nullptr, ResultNullptrArgument);
R_RETURN(this->DoGetFileTimeStampRaw(out, path));
}
Result QueryEntry(char* dst, size_t dst_size, const char* src, size_t src_size, QueryId query,
const Path& path) {
R_RETURN(this->DoQueryEntry(dst, dst_size, src, src_size, query, path));
}
// These aren't accessible as commands
Result CommitProvisionally(s64 counter) {
R_RETURN(this->DoCommitProvisionally(counter));
}
Result Rollback() {
R_RETURN(this->DoRollback());
}
Result Flush() {
R_RETURN(this->DoFlush());
}
private:
Result DoCreateFile(const Path& path, s64 size, int flags) {
R_RETURN(backend.CreateFile(path.GetString(), size));
}
Result DoDeleteFile(const Path& path) {
R_RETURN(backend.DeleteFile(path.GetString()));
}
Result DoCreateDirectory(const Path& path) {
R_RETURN(backend.CreateDirectory(path.GetString()));
}
Result DoDeleteDirectory(const Path& path) {
R_RETURN(backend.DeleteDirectory(path.GetString()));
}
Result DoDeleteDirectoryRecursively(const Path& path) {
R_RETURN(backend.DeleteDirectoryRecursively(path.GetString()));
}
Result DoRenameFile(const Path& old_path, const Path& new_path) {
R_RETURN(backend.RenameFile(old_path.GetString(), new_path.GetString()));
}
Result DoRenameDirectory(const Path& old_path, const Path& new_path) {
R_RETURN(backend.RenameDirectory(old_path.GetString(), new_path.GetString()));
}
Result DoGetEntryType(DirectoryEntryType* out, const Path& path) {
R_RETURN(backend.GetEntryType(out, path.GetString()));
}
Result DoOpenFile(VirtualFile* out_file, const Path& path, OpenMode mode) {
R_RETURN(backend.OpenFile(out_file, path.GetString(), mode));
}
Result DoOpenDirectory(VirtualDir* out_directory, const Path& path, OpenDirectoryMode mode) {
R_RETURN(backend.OpenDirectory(out_directory, path.GetString()));
}
Result DoCommit() {
R_THROW(ResultNotImplemented);
}
Result DoGetFreeSpaceSize(s64* out, const Path& path) {
R_THROW(ResultNotImplemented);
}
Result DoGetTotalSpaceSize(s64* out, const Path& path) {
R_THROW(ResultNotImplemented);
}
Result DoCleanDirectoryRecursively(const Path& path) {
R_RETURN(backend.CleanDirectoryRecursively(path.GetString()));
}
Result DoGetFileTimeStampRaw(FileTimeStampRaw* out, const Path& path) {
R_RETURN(backend.GetFileTimeStampRaw(out, path.GetString()));
}
Result DoQueryEntry(char* dst, size_t dst_size, const char* src, size_t src_size, QueryId query,
const Path& path) {
R_THROW(ResultNotImplemented);
}
// These aren't accessible as commands
Result DoCommitProvisionally(s64 counter) {
R_THROW(ResultNotImplemented);
}
Result DoRollback() {
R_THROW(ResultNotImplemented);
}
Result DoFlush() {
R_THROW(ResultNotImplemented);
}
Service::FileSystem::VfsDirectoryServiceWrapper backend;
};
} // namespace FileSys::Fsa

View File

@ -0,0 +1,36 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "core/file_sys/fs_directory.h"
namespace FileSys::Sf {
struct Path {
char str[EntryNameLengthMax + 1];
static constexpr Path Encode(const char* p) {
Path path = {};
for (size_t i = 0; i < sizeof(path) - 1; i++) {
path.str[i] = p[i];
if (p[i] == '\x00') {
break;
}
}
return path;
}
static constexpr size_t GetPathLength(const Path& path) {
size_t len = 0;
for (size_t i = 0; i < sizeof(path) - 1 && path.str[i] != '\x00'; i++) {
len++;
}
return len;
}
};
static_assert(std::is_trivially_copyable_v<Path>, "Path must be trivially copyable.");
using FspPath = Path;
} // namespace FileSys::Sf

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <mutex>
#include <optional> #include <optional>
#include "core/crypto/aes_util.h" #include "core/crypto/aes_util.h"

View File

@ -44,15 +44,32 @@ public:
GuestMemory() = delete; GuestMemory() = delete;
explicit GuestMemory(M& memory, u64 addr, std::size_t size, explicit GuestMemory(M& memory, u64 addr, std::size_t size,
Common::ScratchBuffer<T>* backup = nullptr) Common::ScratchBuffer<T>* backup = nullptr)
: m_memory{memory}, m_addr{addr}, m_size{size} { : m_memory{&memory}, m_addr{addr}, m_size{size} {
static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
if constexpr (FLAGS & GuestMemoryFlags::Read) { if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
if (!this->TrySetSpan()) {
if (backup) {
backup->resize_destructive(this->size());
m_data_span = *backup;
m_span_valid = true;
m_is_data_copy = true;
} else {
m_data_copy.resize(this->size());
m_data_span = std::span(m_data_copy);
m_span_valid = true;
m_is_data_copy = true;
}
}
} else if constexpr (FLAGS & GuestMemoryFlags::Read) {
Read(addr, size, backup); Read(addr, size, backup);
} }
} }
~GuestMemory() = default; ~GuestMemory() = default;
GuestMemory(GuestMemory&& rhs) = default;
GuestMemory& operator=(GuestMemory&& rhs) = default;
T* data() noexcept { T* data() noexcept {
return m_data_span.data(); return m_data_span.data();
} }
@ -109,8 +126,8 @@ public:
} }
if (this->TrySetSpan()) { if (this->TrySetSpan()) {
if constexpr (FLAGS & GuestMemoryFlags::Safe) { if constexpr (FLAGS & GuestMemoryFlags::Safe && M::HAS_FLUSH_INVALIDATION) {
m_memory.FlushRegion(m_addr, this->size_bytes()); m_memory->FlushRegion(m_addr, this->size_bytes());
} }
} else { } else {
if (backup) { if (backup) {
@ -123,9 +140,9 @@ public:
m_is_data_copy = true; m_is_data_copy = true;
m_span_valid = true; m_span_valid = true;
if constexpr (FLAGS & GuestMemoryFlags::Safe) { if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); m_memory->ReadBlock(m_addr, this->data(), this->size_bytes());
} else { } else {
m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); m_memory->ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
} }
} }
return m_data_span; return m_data_span;
@ -133,18 +150,19 @@ public:
void Write(std::span<T> write_data) noexcept { void Write(std::span<T> write_data) noexcept {
if constexpr (FLAGS & GuestMemoryFlags::Cached) { if constexpr (FLAGS & GuestMemoryFlags::Cached) {
m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); m_memory->WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) { } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); m_memory->WriteBlock(m_addr, write_data.data(), this->size_bytes());
} else { } else {
m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); m_memory->WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
} }
} }
bool TrySetSpan() noexcept { bool TrySetSpan() noexcept {
if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { if (u8* ptr = m_memory->GetSpan(m_addr, this->size_bytes()); ptr) {
m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
m_span_valid = true; m_span_valid = true;
m_is_data_copy = false;
return true; return true;
} }
return false; return false;
@ -159,7 +177,7 @@ protected:
return m_addr_changed; return m_addr_changed;
} }
M& m_memory; M* m_memory;
u64 m_addr{}; u64 m_addr{};
size_t m_size{}; size_t m_size{};
std::span<T> m_data_span{}; std::span<T> m_data_span{};
@ -175,17 +193,7 @@ public:
GuestMemoryScoped() = delete; GuestMemoryScoped() = delete;
explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
Common::ScratchBuffer<T>* backup = nullptr) Common::ScratchBuffer<T>* backup = nullptr)
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {}
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
if (!this->TrySetSpan()) {
if (backup) {
this->m_data_span = *backup;
this->m_span_valid = true;
this->m_is_data_copy = true;
}
}
}
}
~GuestMemoryScoped() { ~GuestMemoryScoped() {
if constexpr (FLAGS & GuestMemoryFlags::Write) { if constexpr (FLAGS & GuestMemoryFlags::Write) {
@ -196,15 +204,17 @@ public:
if (this->AddressChanged() || this->IsDataCopy()) { if (this->AddressChanged() || this->IsDataCopy()) {
ASSERT(this->m_span_valid); ASSERT(this->m_span_valid);
if constexpr (FLAGS & GuestMemoryFlags::Cached) { if constexpr (FLAGS & GuestMemoryFlags::Cached) {
this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); this->m_memory->WriteBlockCached(this->m_addr, this->data(),
this->size_bytes());
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) { } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); this->m_memory->WriteBlock(this->m_addr, this->data(), this->size_bytes());
} else { } else {
this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); this->m_memory->WriteBlockUnsafe(this->m_addr, this->data(),
this->size_bytes());
} }
} else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
(FLAGS & GuestMemoryFlags::Cached)) { (FLAGS & GuestMemoryFlags::Cached)) {
this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); this->m_memory->InvalidateRegion(this->m_addr, this->size_bytes());
} }
} }
} }

View File

@ -3,82 +3,34 @@
#include "core/file_sys/fs_filesystem.h" #include "core/file_sys/fs_filesystem.h"
#include "core/file_sys/savedata_factory.h" #include "core/file_sys/savedata_factory.h"
#include "core/hle/service/cmif_serialization.h"
#include "core/hle/service/filesystem/fsp/fs_i_directory.h" #include "core/hle/service/filesystem/fsp/fs_i_directory.h"
#include "core/hle/service/ipc_helpers.h"
namespace Service::FileSystem { namespace Service::FileSystem {
template <typename T> IDirectory::IDirectory(Core::System& system_, FileSys::VirtualDir directory_,
static void BuildEntryIndex(std::vector<FileSys::DirectoryEntry>& entries,
const std::vector<T>& new_data, FileSys::DirectoryEntryType type) {
entries.reserve(entries.size() + new_data.size());
for (const auto& new_entry : new_data) {
auto name = new_entry->GetName();
if (type == FileSys::DirectoryEntryType::File &&
name == FileSys::GetSaveDataSizeFileName()) {
continue;
}
entries.emplace_back(name, static_cast<s8>(type),
type == FileSys::DirectoryEntryType::Directory ? 0
: new_entry->GetSize());
}
}
IDirectory::IDirectory(Core::System& system_, FileSys::VirtualDir backend_,
FileSys::OpenDirectoryMode mode) FileSys::OpenDirectoryMode mode)
: ServiceFramework{system_, "IDirectory"}, backend(std::move(backend_)) { : ServiceFramework{system_, "IDirectory"},
backend(std::make_unique<FileSys::Fsa::IDirectory>(directory_, mode)) {
static const FunctionInfo functions[] = { static const FunctionInfo functions[] = {
{0, &IDirectory::Read, "Read"}, {0, D<&IDirectory::Read>, "Read"},
{1, &IDirectory::GetEntryCount, "GetEntryCount"}, {1, D<&IDirectory::GetEntryCount>, "GetEntryCount"},
}; };
RegisterHandlers(functions); RegisterHandlers(functions);
// TODO(DarkLordZach): Verify that this is the correct behavior.
// Build entry index now to save time later.
if (True(mode & FileSys::OpenDirectoryMode::Directory)) {
BuildEntryIndex(entries, backend->GetSubdirectories(),
FileSys::DirectoryEntryType::Directory);
}
if (True(mode & FileSys::OpenDirectoryMode::File)) {
BuildEntryIndex(entries, backend->GetFiles(), FileSys::DirectoryEntryType::File);
}
} }
void IDirectory::Read(HLERequestContext& ctx) { Result IDirectory::Read(
Out<s64> out_count,
const OutArray<FileSys::DirectoryEntry, BufferAttr_HipcMapAlias> out_entries) {
LOG_DEBUG(Service_FS, "called."); LOG_DEBUG(Service_FS, "called.");
// Calculate how many entries we can fit in the output buffer R_RETURN(backend->Read(out_count, out_entries.data(), out_entries.size()));
const u64 count_entries = ctx.GetWriteBufferNumElements<FileSys::DirectoryEntry>();
// Cap at total number of entries.
const u64 actual_entries = std::min(count_entries, entries.size() - next_entry_index);
// Determine data start and end
const auto* begin = reinterpret_cast<u8*>(entries.data() + next_entry_index);
const auto* end = reinterpret_cast<u8*>(entries.data() + next_entry_index + actual_entries);
const auto range_size = static_cast<std::size_t>(std::distance(begin, end));
next_entry_index += actual_entries;
// Write the data to memory
ctx.WriteBuffer(begin, range_size);
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(ResultSuccess);
rb.Push(actual_entries);
} }
void IDirectory::GetEntryCount(HLERequestContext& ctx) { Result IDirectory::GetEntryCount(Out<s64> out_count) {
LOG_DEBUG(Service_FS, "called"); LOG_DEBUG(Service_FS, "called");
u64 count = entries.size() - next_entry_index; R_RETURN(backend->GetEntryCount(out_count));
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(ResultSuccess);
rb.Push(count);
} }
} // namespace Service::FileSystem } // namespace Service::FileSystem

View File

@ -3,7 +3,9 @@
#pragma once #pragma once
#include "core/file_sys/fsa/fs_i_directory.h"
#include "core/file_sys/vfs/vfs.h" #include "core/file_sys/vfs/vfs.h"
#include "core/hle/service/cmif_types.h"
#include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/service.h" #include "core/hle/service/service.h"
@ -15,16 +17,15 @@ namespace Service::FileSystem {
class IDirectory final : public ServiceFramework<IDirectory> { class IDirectory final : public ServiceFramework<IDirectory> {
public: public:
explicit IDirectory(Core::System& system_, FileSys::VirtualDir backend_, explicit IDirectory(Core::System& system_, FileSys::VirtualDir directory_,
FileSys::OpenDirectoryMode mode); FileSys::OpenDirectoryMode mode);
private: private:
FileSys::VirtualDir backend; std::unique_ptr<FileSys::Fsa::IDirectory> backend;
std::vector<FileSys::DirectoryEntry> entries;
u64 next_entry_index = 0;
void Read(HLERequestContext& ctx); Result Read(Out<s64> out_count,
void GetEntryCount(HLERequestContext& ctx); const OutArray<FileSys::DirectoryEntry, BufferAttr_HipcMapAlias> out_entries);
Result GetEntryCount(Out<s64> out_count);
}; };
} // namespace Service::FileSystem } // namespace Service::FileSystem

View File

@ -2,126 +2,64 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "core/file_sys/errors.h" #include "core/file_sys/errors.h"
#include "core/hle/service/cmif_serialization.h"
#include "core/hle/service/filesystem/fsp/fs_i_file.h" #include "core/hle/service/filesystem/fsp/fs_i_file.h"
#include "core/hle/service/ipc_helpers.h"
namespace Service::FileSystem { namespace Service::FileSystem {
IFile::IFile(Core::System& system_, FileSys::VirtualFile backend_) IFile::IFile(Core::System& system_, FileSys::VirtualFile file_)
: ServiceFramework{system_, "IFile"}, backend(std::move(backend_)) { : ServiceFramework{system_, "IFile"}, backend{std::make_unique<FileSys::Fsa::IFile>(file_)} {
// clang-format off
static const FunctionInfo functions[] = { static const FunctionInfo functions[] = {
{0, &IFile::Read, "Read"}, {0, D<&IFile::Read>, "Read"},
{1, &IFile::Write, "Write"}, {1, D<&IFile::Write>, "Write"},
{2, &IFile::Flush, "Flush"}, {2, D<&IFile::Flush>, "Flush"},
{3, &IFile::SetSize, "SetSize"}, {3, D<&IFile::SetSize>, "SetSize"},
{4, &IFile::GetSize, "GetSize"}, {4, D<&IFile::GetSize>, "GetSize"},
{5, nullptr, "OperateRange"}, {5, nullptr, "OperateRange"},
{6, nullptr, "OperateRangeWithBuffer"}, {6, nullptr, "OperateRangeWithBuffer"},
}; };
// clang-format on
RegisterHandlers(functions); RegisterHandlers(functions);
} }
void IFile::Read(HLERequestContext& ctx) { Result IFile::Read(
IPC::RequestParser rp{ctx}; FileSys::ReadOption option, Out<s64> out_size, s64 offset,
const u64 option = rp.Pop<u64>(); const OutBuffer<BufferAttr_HipcMapAlias | BufferAttr_HipcMapTransferAllowsNonSecure> out_buffer,
const s64 offset = rp.Pop<s64>(); s64 size) {
const s64 length = rp.Pop<s64>(); LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option.value, offset,
size);
LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option, offset, length);
// Error checking
if (length < 0) {
LOG_ERROR(Service_FS, "Length is less than 0, length={}", length);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(FileSys::ResultInvalidSize);
return;
}
if (offset < 0) {
LOG_ERROR(Service_FS, "Offset is less than 0, offset={}", offset);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(FileSys::ResultInvalidOffset);
return;
}
// Read the data from the Storage backend // Read the data from the Storage backend
std::vector<u8> output = backend->ReadBytes(length, offset); R_RETURN(
backend->Read(reinterpret_cast<size_t*>(out_size.Get()), offset, out_buffer.data(), size));
// Write the data to memory
ctx.WriteBuffer(output);
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(ResultSuccess);
rb.Push(static_cast<u64>(output.size()));
} }
void IFile::Write(HLERequestContext& ctx) { Result IFile::Write(
IPC::RequestParser rp{ctx}; const InBuffer<BufferAttr_HipcMapAlias | BufferAttr_HipcMapTransferAllowsNonSecure> buffer,
const u64 option = rp.Pop<u64>(); FileSys::WriteOption option, s64 offset, s64 size) {
const s64 offset = rp.Pop<s64>(); LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option.value, offset,
const s64 length = rp.Pop<s64>(); size);
LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option, offset, length); R_RETURN(backend->Write(offset, buffer.data(), size, option));
// Error checking
if (length < 0) {
LOG_ERROR(Service_FS, "Length is less than 0, length={}", length);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(FileSys::ResultInvalidSize);
return;
}
if (offset < 0) {
LOG_ERROR(Service_FS, "Offset is less than 0, offset={}", offset);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(FileSys::ResultInvalidOffset);
return;
}
const auto data = ctx.ReadBuffer();
ASSERT_MSG(static_cast<s64>(data.size()) <= length,
"Attempting to write more data than requested (requested={:016X}, actual={:016X}).",
length, data.size());
// Write the data to the Storage backend
const auto write_size =
static_cast<std::size_t>(std::distance(data.begin(), data.begin() + length));
const std::size_t written = backend->Write(data.data(), write_size, offset);
ASSERT_MSG(static_cast<s64>(written) == length,
"Could not write all bytes to file (requested={:016X}, actual={:016X}).", length,
written);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
} }
void IFile::Flush(HLERequestContext& ctx) { Result IFile::Flush() {
LOG_DEBUG(Service_FS, "called"); LOG_DEBUG(Service_FS, "called");
// Exists for SDK compatibiltity -- No need to flush file. R_RETURN(backend->Flush());
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
} }
void IFile::SetSize(HLERequestContext& ctx) { Result IFile::SetSize(s64 size) {
IPC::RequestParser rp{ctx};
const u64 size = rp.Pop<u64>();
LOG_DEBUG(Service_FS, "called, size={}", size); LOG_DEBUG(Service_FS, "called, size={}", size);
backend->Resize(size); R_RETURN(backend->SetSize(size));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
} }
void IFile::GetSize(HLERequestContext& ctx) { Result IFile::GetSize(Out<s64> out_size) {
const u64 size = backend->GetSize(); LOG_DEBUG(Service_FS, "called");
LOG_DEBUG(Service_FS, "called, size={}", size);
IPC::ResponseBuilder rb{ctx, 4}; R_RETURN(backend->GetSize(out_size));
rb.Push(ResultSuccess);
rb.Push<u64>(size);
} }
} // namespace Service::FileSystem } // namespace Service::FileSystem

View File

@ -3,6 +3,8 @@
#pragma once #pragma once
#include "core/file_sys/fsa/fs_i_file.h"
#include "core/hle/service/cmif_types.h"
#include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/service.h" #include "core/hle/service/service.h"
@ -10,16 +12,21 @@ namespace Service::FileSystem {
class IFile final : public ServiceFramework<IFile> { class IFile final : public ServiceFramework<IFile> {
public: public:
explicit IFile(Core::System& system_, FileSys::VirtualFile backend_); explicit IFile(Core::System& system_, FileSys::VirtualFile file_);
private: private:
FileSys::VirtualFile backend; std::unique_ptr<FileSys::Fsa::IFile> backend;
void Read(HLERequestContext& ctx); Result Read(FileSys::ReadOption option, Out<s64> out_size, s64 offset,
void Write(HLERequestContext& ctx); const OutBuffer<BufferAttr_HipcMapAlias | BufferAttr_HipcMapTransferAllowsNonSecure>
void Flush(HLERequestContext& ctx); out_buffer,
void SetSize(HLERequestContext& ctx); s64 size);
void GetSize(HLERequestContext& ctx); Result Write(
const InBuffer<BufferAttr_HipcMapAlias | BufferAttr_HipcMapTransferAllowsNonSecure> buffer,
FileSys::WriteOption option, s64 offset, s64 size);
Result Flush();
Result SetSize(s64 size);
Result GetSize(Out<s64> out_size);
}; };
} // namespace Service::FileSystem } // namespace Service::FileSystem

View File

@ -2,261 +2,172 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/string_util.h" #include "common/string_util.h"
#include "core/file_sys/fssrv/fssrv_sf_path.h"
#include "core/hle/service/cmif_serialization.h"
#include "core/hle/service/filesystem/fsp/fs_i_directory.h" #include "core/hle/service/filesystem/fsp/fs_i_directory.h"
#include "core/hle/service/filesystem/fsp/fs_i_file.h" #include "core/hle/service/filesystem/fsp/fs_i_file.h"
#include "core/hle/service/filesystem/fsp/fs_i_filesystem.h" #include "core/hle/service/filesystem/fsp/fs_i_filesystem.h"
#include "core/hle/service/ipc_helpers.h"
namespace Service::FileSystem { namespace Service::FileSystem {
IFileSystem::IFileSystem(Core::System& system_, FileSys::VirtualDir backend_, SizeGetter size_) IFileSystem::IFileSystem(Core::System& system_, FileSys::VirtualDir dir_, SizeGetter size_getter_)
: ServiceFramework{system_, "IFileSystem"}, backend{std::move(backend_)}, size{std::move( : ServiceFramework{system_, "IFileSystem"}, backend{std::make_unique<FileSys::Fsa::IFileSystem>(
size_)} { dir_)},
size_getter{std::move(size_getter_)} {
static const FunctionInfo functions[] = { static const FunctionInfo functions[] = {
{0, &IFileSystem::CreateFile, "CreateFile"}, {0, D<&IFileSystem::CreateFile>, "CreateFile"},
{1, &IFileSystem::DeleteFile, "DeleteFile"}, {1, D<&IFileSystem::DeleteFile>, "DeleteFile"},
{2, &IFileSystem::CreateDirectory, "CreateDirectory"}, {2, D<&IFileSystem::CreateDirectory>, "CreateDirectory"},
{3, &IFileSystem::DeleteDirectory, "DeleteDirectory"}, {3, D<&IFileSystem::DeleteDirectory>, "DeleteDirectory"},
{4, &IFileSystem::DeleteDirectoryRecursively, "DeleteDirectoryRecursively"}, {4, D<&IFileSystem::DeleteDirectoryRecursively>, "DeleteDirectoryRecursively"},
{5, &IFileSystem::RenameFile, "RenameFile"}, {5, D<&IFileSystem::RenameFile>, "RenameFile"},
{6, nullptr, "RenameDirectory"}, {6, nullptr, "RenameDirectory"},
{7, &IFileSystem::GetEntryType, "GetEntryType"}, {7, D<&IFileSystem::GetEntryType>, "GetEntryType"},
{8, &IFileSystem::OpenFile, "OpenFile"}, {8, D<&IFileSystem::OpenFile>, "OpenFile"},
{9, &IFileSystem::OpenDirectory, "OpenDirectory"}, {9, D<&IFileSystem::OpenDirectory>, "OpenDirectory"},
{10, &IFileSystem::Commit, "Commit"}, {10, D<&IFileSystem::Commit>, "Commit"},
{11, &IFileSystem::GetFreeSpaceSize, "GetFreeSpaceSize"}, {11, D<&IFileSystem::GetFreeSpaceSize>, "GetFreeSpaceSize"},
{12, &IFileSystem::GetTotalSpaceSize, "GetTotalSpaceSize"}, {12, D<&IFileSystem::GetTotalSpaceSize>, "GetTotalSpaceSize"},
{13, &IFileSystem::CleanDirectoryRecursively, "CleanDirectoryRecursively"}, {13, D<&IFileSystem::CleanDirectoryRecursively>, "CleanDirectoryRecursively"},
{14, &IFileSystem::GetFileTimeStampRaw, "GetFileTimeStampRaw"}, {14, D<&IFileSystem::GetFileTimeStampRaw>, "GetFileTimeStampRaw"},
{15, nullptr, "QueryEntry"}, {15, nullptr, "QueryEntry"},
{16, &IFileSystem::GetFileSystemAttribute, "GetFileSystemAttribute"}, {16, D<&IFileSystem::GetFileSystemAttribute>, "GetFileSystemAttribute"},
}; };
RegisterHandlers(functions); RegisterHandlers(functions);
} }
void IFileSystem::CreateFile(HLERequestContext& ctx) { Result IFileSystem::CreateFile(const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path,
IPC::RequestParser rp{ctx}; s32 option, s64 size) {
LOG_DEBUG(Service_FS, "called. file={}, option=0x{:X}, size=0x{:08X}", path->str, option, size);
const auto file_buffer = ctx.ReadBuffer(); R_RETURN(backend->CreateFile(FileSys::Path(path->str), size));
const std::string name = Common::StringFromBuffer(file_buffer);
const u64 file_mode = rp.Pop<u64>();
const u32 file_size = rp.Pop<u32>();
LOG_DEBUG(Service_FS, "called. file={}, mode=0x{:X}, size=0x{:08X}", name, file_mode,
file_size);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.CreateFile(name, file_size));
} }
void IFileSystem::DeleteFile(HLERequestContext& ctx) { Result IFileSystem::DeleteFile(const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
const auto file_buffer = ctx.ReadBuffer(); LOG_DEBUG(Service_FS, "called. file={}", path->str);
const std::string name = Common::StringFromBuffer(file_buffer);
LOG_DEBUG(Service_FS, "called. file={}", name); R_RETURN(backend->DeleteFile(FileSys::Path(path->str)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.DeleteFile(name));
} }
void IFileSystem::CreateDirectory(HLERequestContext& ctx) { Result IFileSystem::CreateDirectory(
const auto file_buffer = ctx.ReadBuffer(); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
const std::string name = Common::StringFromBuffer(file_buffer); LOG_DEBUG(Service_FS, "called. directory={}", path->str);
LOG_DEBUG(Service_FS, "called. directory={}", name); R_RETURN(backend->CreateDirectory(FileSys::Path(path->str)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.CreateDirectory(name));
} }
void IFileSystem::DeleteDirectory(HLERequestContext& ctx) { Result IFileSystem::DeleteDirectory(
const auto file_buffer = ctx.ReadBuffer(); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
const std::string name = Common::StringFromBuffer(file_buffer); LOG_DEBUG(Service_FS, "called. directory={}", path->str);
LOG_DEBUG(Service_FS, "called. directory={}", name); R_RETURN(backend->DeleteDirectory(FileSys::Path(path->str)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.DeleteDirectory(name));
} }
void IFileSystem::DeleteDirectoryRecursively(HLERequestContext& ctx) { Result IFileSystem::DeleteDirectoryRecursively(
const auto file_buffer = ctx.ReadBuffer(); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
const std::string name = Common::StringFromBuffer(file_buffer); LOG_DEBUG(Service_FS, "called. directory={}", path->str);
LOG_DEBUG(Service_FS, "called. directory={}", name); R_RETURN(backend->DeleteDirectoryRecursively(FileSys::Path(path->str)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.DeleteDirectoryRecursively(name));
} }
void IFileSystem::CleanDirectoryRecursively(HLERequestContext& ctx) { Result IFileSystem::CleanDirectoryRecursively(
const auto file_buffer = ctx.ReadBuffer(); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
const std::string name = Common::StringFromBuffer(file_buffer); LOG_DEBUG(Service_FS, "called. Directory: {}", path->str);
LOG_DEBUG(Service_FS, "called. Directory: {}", name); R_RETURN(backend->CleanDirectoryRecursively(FileSys::Path(path->str)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.CleanDirectoryRecursively(name));
} }
void IFileSystem::RenameFile(HLERequestContext& ctx) { Result IFileSystem::RenameFile(
const std::string src_name = Common::StringFromBuffer(ctx.ReadBuffer(0)); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> old_path,
const std::string dst_name = Common::StringFromBuffer(ctx.ReadBuffer(1)); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> new_path) {
LOG_DEBUG(Service_FS, "called. file '{}' to file '{}'", old_path->str, new_path->str);
LOG_DEBUG(Service_FS, "called. file '{}' to file '{}'", src_name, dst_name); R_RETURN(backend->RenameFile(FileSys::Path(old_path->str), FileSys::Path(new_path->str)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.RenameFile(src_name, dst_name));
} }
void IFileSystem::OpenFile(HLERequestContext& ctx) { Result IFileSystem::OpenFile(OutInterface<IFile> out_interface,
IPC::RequestParser rp{ctx}; const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path,
u32 mode) {
const auto file_buffer = ctx.ReadBuffer(); LOG_DEBUG(Service_FS, "called. file={}, mode={}", path->str, mode);
const std::string name = Common::StringFromBuffer(file_buffer);
const auto mode = static_cast<FileSys::OpenMode>(rp.Pop<u32>());
LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, mode);
FileSys::VirtualFile vfs_file{}; FileSys::VirtualFile vfs_file{};
auto result = backend.OpenFile(&vfs_file, name, mode); R_TRY(backend->OpenFile(&vfs_file, FileSys::Path(path->str),
if (result != ResultSuccess) { static_cast<FileSys::OpenMode>(mode)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
}
auto file = std::make_shared<IFile>(system, vfs_file); *out_interface = std::make_shared<IFile>(system, vfs_file);
R_SUCCEED();
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(ResultSuccess);
rb.PushIpcInterface<IFile>(std::move(file));
} }
void IFileSystem::OpenDirectory(HLERequestContext& ctx) { Result IFileSystem::OpenDirectory(OutInterface<IDirectory> out_interface,
IPC::RequestParser rp{ctx}; const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path,
u32 mode) {
const auto file_buffer = ctx.ReadBuffer(); LOG_DEBUG(Service_FS, "called. directory={}, mode={}", path->str, mode);
const std::string name = Common::StringFromBuffer(file_buffer);
const auto mode = rp.PopRaw<FileSys::OpenDirectoryMode>();
LOG_DEBUG(Service_FS, "called. directory={}, mode={}", name, mode);
FileSys::VirtualDir vfs_dir{}; FileSys::VirtualDir vfs_dir{};
auto result = backend.OpenDirectory(&vfs_dir, name); R_TRY(backend->OpenDirectory(&vfs_dir, FileSys::Path(path->str),
if (result != ResultSuccess) { static_cast<FileSys::OpenDirectoryMode>(mode)));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
}
auto directory = std::make_shared<IDirectory>(system, vfs_dir, mode); *out_interface = std::make_shared<IDirectory>(system, vfs_dir,
static_cast<FileSys::OpenDirectoryMode>(mode));
IPC::ResponseBuilder rb{ctx, 2, 0, 1}; R_SUCCEED();
rb.Push(ResultSuccess);
rb.PushIpcInterface<IDirectory>(std::move(directory));
} }
void IFileSystem::GetEntryType(HLERequestContext& ctx) { Result IFileSystem::GetEntryType(
const auto file_buffer = ctx.ReadBuffer(); Out<u32> out_type, const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
const std::string name = Common::StringFromBuffer(file_buffer); LOG_DEBUG(Service_FS, "called. file={}", path->str);
LOG_DEBUG(Service_FS, "called. file={}", name);
FileSys::DirectoryEntryType vfs_entry_type{}; FileSys::DirectoryEntryType vfs_entry_type{};
auto result = backend.GetEntryType(&vfs_entry_type, name); R_TRY(backend->GetEntryType(&vfs_entry_type, FileSys::Path(path->str)));
if (result != ResultSuccess) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
}
IPC::ResponseBuilder rb{ctx, 3}; *out_type = static_cast<u32>(vfs_entry_type);
rb.Push(ResultSuccess); R_SUCCEED();
rb.Push<u32>(static_cast<u32>(vfs_entry_type));
} }
void IFileSystem::Commit(HLERequestContext& ctx) { Result IFileSystem::Commit() {
LOG_WARNING(Service_FS, "(STUBBED) called"); LOG_WARNING(Service_FS, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2}; R_SUCCEED();
rb.Push(ResultSuccess);
} }
void IFileSystem::GetFreeSpaceSize(HLERequestContext& ctx) { Result IFileSystem::GetFreeSpaceSize(
Out<s64> out_size, const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
LOG_DEBUG(Service_FS, "called"); LOG_DEBUG(Service_FS, "called");
IPC::ResponseBuilder rb{ctx, 4}; *out_size = size_getter.get_free_size();
rb.Push(ResultSuccess); R_SUCCEED();
rb.Push(size.get_free_size());
} }
void IFileSystem::GetTotalSpaceSize(HLERequestContext& ctx) { Result IFileSystem::GetTotalSpaceSize(
Out<s64> out_size, const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
LOG_DEBUG(Service_FS, "called"); LOG_DEBUG(Service_FS, "called");
IPC::ResponseBuilder rb{ctx, 4}; *out_size = size_getter.get_total_size();
rb.Push(ResultSuccess); R_SUCCEED();
rb.Push(size.get_total_size());
} }
void IFileSystem::GetFileTimeStampRaw(HLERequestContext& ctx) { Result IFileSystem::GetFileTimeStampRaw(
const auto file_buffer = ctx.ReadBuffer(); Out<FileSys::FileTimeStampRaw> out_timestamp,
const std::string name = Common::StringFromBuffer(file_buffer); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path) {
LOG_WARNING(Service_FS, "(Partial Implementation) called. file={}", path->str);
LOG_WARNING(Service_FS, "(Partial Implementation) called. file={}", name);
FileSys::FileTimeStampRaw vfs_timestamp{}; FileSys::FileTimeStampRaw vfs_timestamp{};
auto result = backend.GetFileTimeStampRaw(&vfs_timestamp, name); R_TRY(backend->GetFileTimeStampRaw(&vfs_timestamp, FileSys::Path(path->str)));
if (result != ResultSuccess) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
}
IPC::ResponseBuilder rb{ctx, 10}; *out_timestamp = vfs_timestamp;
rb.Push(ResultSuccess); R_SUCCEED();
rb.PushRaw(vfs_timestamp);
} }
void IFileSystem::GetFileSystemAttribute(HLERequestContext& ctx) { Result IFileSystem::GetFileSystemAttribute(Out<FileSys::FileSystemAttribute> out_attribute) {
LOG_WARNING(Service_FS, "(STUBBED) called"); LOG_WARNING(Service_FS, "(STUBBED) called");
struct FileSystemAttribute { FileSys::FileSystemAttribute savedata_attribute{};
u8 dir_entry_name_length_max_defined;
u8 file_entry_name_length_max_defined;
u8 dir_path_name_length_max_defined;
u8 file_path_name_length_max_defined;
INSERT_PADDING_BYTES_NOINIT(0x5);
u8 utf16_dir_entry_name_length_max_defined;
u8 utf16_file_entry_name_length_max_defined;
u8 utf16_dir_path_name_length_max_defined;
u8 utf16_file_path_name_length_max_defined;
INSERT_PADDING_BYTES_NOINIT(0x18);
s32 dir_entry_name_length_max;
s32 file_entry_name_length_max;
s32 dir_path_name_length_max;
s32 file_path_name_length_max;
INSERT_PADDING_WORDS_NOINIT(0x5);
s32 utf16_dir_entry_name_length_max;
s32 utf16_file_entry_name_length_max;
s32 utf16_dir_path_name_length_max;
s32 utf16_file_path_name_length_max;
INSERT_PADDING_WORDS_NOINIT(0x18);
INSERT_PADDING_WORDS_NOINIT(0x1);
};
static_assert(sizeof(FileSystemAttribute) == 0xc0, "FileSystemAttribute has incorrect size");
FileSystemAttribute savedata_attribute{};
savedata_attribute.dir_entry_name_length_max_defined = true; savedata_attribute.dir_entry_name_length_max_defined = true;
savedata_attribute.file_entry_name_length_max_defined = true; savedata_attribute.file_entry_name_length_max_defined = true;
savedata_attribute.dir_entry_name_length_max = 0x40; savedata_attribute.dir_entry_name_length_max = 0x40;
savedata_attribute.file_entry_name_length_max = 0x40; savedata_attribute.file_entry_name_length_max = 0x40;
IPC::ResponseBuilder rb{ctx, 50}; *out_attribute = savedata_attribute;
rb.Push(ResultSuccess); R_SUCCEED();
rb.PushRaw(savedata_attribute);
} }
} // namespace Service::FileSystem } // namespace Service::FileSystem

View File

@ -3,36 +3,58 @@
#pragma once #pragma once
#include "common/common_funcs.h"
#include "core/file_sys/fs_filesystem.h"
#include "core/file_sys/fsa/fs_i_filesystem.h"
#include "core/file_sys/vfs/vfs.h" #include "core/file_sys/vfs/vfs.h"
#include "core/hle/service/cmif_types.h"
#include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/filesystem/fsp/fsp_util.h" #include "core/hle/service/filesystem/fsp/fsp_util.h"
#include "core/hle/service/service.h" #include "core/hle/service/service.h"
namespace FileSys::Sf {
struct Path;
}
namespace Service::FileSystem { namespace Service::FileSystem {
class IFile;
class IDirectory;
class IFileSystem final : public ServiceFramework<IFileSystem> { class IFileSystem final : public ServiceFramework<IFileSystem> {
public: public:
explicit IFileSystem(Core::System& system_, FileSys::VirtualDir backend_, SizeGetter size_); explicit IFileSystem(Core::System& system_, FileSys::VirtualDir dir_, SizeGetter size_getter_);
void CreateFile(HLERequestContext& ctx); Result CreateFile(const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path, s32 option,
void DeleteFile(HLERequestContext& ctx); s64 size);
void CreateDirectory(HLERequestContext& ctx); Result DeleteFile(const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
void DeleteDirectory(HLERequestContext& ctx); Result CreateDirectory(const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
void DeleteDirectoryRecursively(HLERequestContext& ctx); Result DeleteDirectory(const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
void CleanDirectoryRecursively(HLERequestContext& ctx); Result DeleteDirectoryRecursively(
void RenameFile(HLERequestContext& ctx); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
void OpenFile(HLERequestContext& ctx); Result CleanDirectoryRecursively(
void OpenDirectory(HLERequestContext& ctx); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
void GetEntryType(HLERequestContext& ctx); Result RenameFile(const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> old_path,
void Commit(HLERequestContext& ctx); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> new_path);
void GetFreeSpaceSize(HLERequestContext& ctx); Result OpenFile(OutInterface<IFile> out_interface,
void GetTotalSpaceSize(HLERequestContext& ctx); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path, u32 mode);
void GetFileTimeStampRaw(HLERequestContext& ctx); Result OpenDirectory(OutInterface<IDirectory> out_interface,
void GetFileSystemAttribute(HLERequestContext& ctx); const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path,
u32 mode);
Result GetEntryType(Out<u32> out_type,
const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
Result Commit();
Result GetFreeSpaceSize(Out<s64> out_size,
const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
Result GetTotalSpaceSize(Out<s64> out_size,
const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
Result GetFileTimeStampRaw(Out<FileSys::FileTimeStampRaw> out_timestamp,
const InLargeData<FileSys::Sf::Path, BufferAttr_HipcPointer> path);
Result GetFileSystemAttribute(Out<FileSys::FileSystemAttribute> out_attribute);
private: private:
VfsDirectoryServiceWrapper backend; std::unique_ptr<FileSys::Fsa::IFileSystem> backend;
SizeGetter size; SizeGetter size_getter;
}; };
} // namespace Service::FileSystem } // namespace Service::FileSystem

View File

@ -68,10 +68,7 @@ public:
const SyncpointManager& GetSyncpointManager() const; const SyncpointManager& GetSyncpointManager() const;
struct Host1xDeviceFileData { struct Host1xDeviceFileData {
std::unordered_map<DeviceFD, u32> fd_to_id{};
std::deque<u32> syncpts_accumulated{}; std::deque<u32> syncpts_accumulated{};
u32 nvdec_next_id{};
u32 vic_next_id{};
}; };
Host1xDeviceFileData& Host1xDeviceFile(); Host1xDeviceFileData& Host1xDeviceFile();

View File

@ -8,6 +8,7 @@
#include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h" #include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
@ -21,13 +22,8 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
switch (command.group) { switch (command.group) {
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
case 0x1: { case 0x1:
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
}
return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd); return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd);
}
case 0x2: case 0x2:
return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output); return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output);
case 0x3: case 0x3:
@ -72,15 +68,12 @@ void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream started"); LOG_INFO(Service_NVDRV, "NVDEC video stream started");
system.SetNVDECActive(true); system.SetNVDECActive(true);
sessions[fd] = session_id; sessions[fd] = session_id;
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::NvDec, channel_syncpoint);
} }
void nvhost_nvdec::OnClose(DeviceFD fd) { void nvhost_nvdec::OnClose(DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
auto& host1x_file = core.Host1xDeviceFile(); host1x.StopDevice(fd, Tegra::Host1x::ChannelType::NvDec);
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
system.SetNVDECActive(false); system.SetNVDECActive(false);
auto it = sessions.find(fd); auto it = sessions.find(fd);
if (it != sessions.end()) { if (it != sessions.end()) {

View File

@ -55,8 +55,9 @@ std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_, nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_,
NvCore::ChannelType channel_type_) NvCore::ChannelType channel_type_)
: nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()}, : nvdevice{system_}, host1x{system_.Host1x()}, core{core_},
nvmap{core.GetNvMapFile()}, channel_type{channel_type_} { syncpoint_manager{core.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
channel_type{channel_type_} {
auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated; auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated;
if (syncpts_accumulated.empty()) { if (syncpts_accumulated.empty()) {
channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false); channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
@ -95,24 +96,24 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset); offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset);
offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
auto& gpu = system.GPU();
auto* session = core.GetSession(sessions[fd]); auto* session = core.GetSession(sessions[fd]);
if (gpu.UseNvdec()) { for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
for (std::size_t i = 0; i < syncpt_increments.size(); i++) { const SyncptIncr& syncpt_incr = syncpt_increments[i];
const SyncptIncr& syncpt_incr = syncpt_increments[i]; fence_thresholds[i] =
fence_thresholds[i] = syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
}
} }
for (const auto& cmd_buffer : command_buffers) { for (const auto& cmd_buffer : command_buffers) {
const auto object = nvmap.GetHandle(cmd_buffer.memory_id); const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); Core::Memory::CpuGuestMemory<Tegra::ChCommandHeader,
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), Core::Memory::GuestMemoryFlags::SafeRead>
cmdlist.size() * sizeof(u32)); cmdlist(session->process->GetMemory(), object->address + cmd_buffer.offset,
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); cmd_buffer.word_count);
host1x.PushEntries(fd, std::move(cmdlist));
} }
// Some games expect command_buffers to be written back // Some games expect command_buffers to be written back
offset = 0; offset = 0;
offset += WriteVectors(data, command_buffers, offset); offset += WriteVectors(data, command_buffers, offset);

View File

@ -119,6 +119,7 @@ protected:
Kernel::KEvent* QueryEvent(u32 event_id) override; Kernel::KEvent* QueryEvent(u32 event_id) override;
Tegra::Host1x::Host1x& host1x;
u32 channel_syncpoint; u32 channel_syncpoint;
s32_le nvmap_fd{}; s32_le nvmap_fd{};
u32_le submit_timeout{}; u32_le submit_timeout{};

View File

@ -7,6 +7,7 @@
#include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h" #include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
#include "core/hle/service/nvdrv/devices/nvhost_vic.h" #include "core/hle/service/nvdrv/devices/nvhost_vic.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
@ -21,13 +22,8 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
switch (command.group) { switch (command.group) {
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
case 0x1: { case 0x1:
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
}
return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd); return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd);
}
case 0x2: case 0x2:
return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output); return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output);
case 0x3: case 0x3:
@ -70,14 +66,11 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) { void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
sessions[fd] = session_id; sessions[fd] = session_id;
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::VIC, channel_syncpoint);
} }
void nvhost_vic::OnClose(DeviceFD fd) { void nvhost_vic::OnClose(DeviceFD fd) {
auto& host1x_file = core.Host1xDeviceFile(); host1x.StopDevice(fd, Tegra::Host1x::ChannelType::VIC);
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
sessions.erase(fd); sessions.erase(fd);
} }

View File

@ -64,6 +64,8 @@ public:
Memory(Memory&&) = default; Memory(Memory&&) = default;
Memory& operator=(Memory&&) = delete; Memory& operator=(Memory&&) = delete;
static constexpr bool HAS_FLUSH_INVALIDATION = false;
/** /**
* Resets the state of the Memory system. * Resets the state of the Memory system.
*/ */

View File

@ -261,7 +261,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
case Stage::Geometry: case Stage::Geometry:
execution_model = spv::ExecutionModel::Geometry; execution_model = spv::ExecutionModel::Geometry;
ctx.AddCapability(spv::Capability::Geometry); ctx.AddCapability(spv::Capability::Geometry);
ctx.AddCapability(spv::Capability::GeometryStreams); if (ctx.profile.support_geometry_streams) {
ctx.AddCapability(spv::Capability::GeometryStreams);
}
switch (ctx.runtime_info.input_topology) { switch (ctx.runtime_info.input_topology) {
case InputTopology::Points: case InputTopology::Points:
ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints);

View File

@ -129,7 +129,9 @@ void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
if (ctx.runtime_info.convert_depth_mode && !ctx.profile.support_native_ndc) { if (ctx.runtime_info.convert_depth_mode && !ctx.profile.support_native_ndc) {
ConvertDepthMode(ctx); ConvertDepthMode(ctx);
} }
if (stream.IsImmediate()) { if (!ctx.profile.support_geometry_streams) {
throw NotImplementedException("Geometry streams");
} else if (stream.IsImmediate()) {
ctx.OpEmitStreamVertex(ctx.Def(stream)); ctx.OpEmitStreamVertex(ctx.Def(stream));
} else { } else {
LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
@ -140,7 +142,9 @@ void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
} }
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
if (stream.IsImmediate()) { if (!ctx.profile.support_geometry_streams) {
throw NotImplementedException("Geometry streams");
} else if (stream.IsImmediate()) {
ctx.OpEndStreamPrimitive(ctx.Def(stream)); ctx.OpEndStreamPrimitive(ctx.Def(stream));
} else { } else {
LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); LOG_WARNING(Shader_SPIRV, "Stream is not immediate");

View File

@ -44,6 +44,7 @@ struct Profile {
bool support_gl_derivative_control{}; bool support_gl_derivative_control{};
bool support_scaled_attributes{}; bool support_scaled_attributes{};
bool support_multi_viewport{}; bool support_multi_viewport{};
bool support_geometry_streams{};
bool warp_size_potentially_larger_than_guest{}; bool warp_size_potentially_larger_than_guest{};

View File

@ -60,8 +60,8 @@ add_library(video_core STATIC
framebuffer_config.h framebuffer_config.h
fsr.cpp fsr.cpp
fsr.h fsr.h
host1x/codecs/codec.cpp host1x/codecs/decoder.cpp
host1x/codecs/codec.h host1x/codecs/decoder.h
host1x/codecs/h264.cpp host1x/codecs/h264.cpp
host1x/codecs/h264.h host1x/codecs/h264.h
host1x/codecs/vp8.cpp host1x/codecs/vp8.cpp
@ -80,8 +80,6 @@ add_library(video_core STATIC
host1x/nvdec.cpp host1x/nvdec.cpp
host1x/nvdec.h host1x/nvdec.h
host1x/nvdec_common.h host1x/nvdec_common.h
host1x/sync_manager.cpp
host1x/sync_manager.h
host1x/syncpoint_manager.cpp host1x/syncpoint_manager.cpp
host1x/syncpoint_manager.h host1x/syncpoint_manager.h
host1x/vic.cpp host1x/vic.cpp
@ -392,4 +390,8 @@ if (ANDROID AND ARCHITECTURE_arm64)
target_link_libraries(video_core PRIVATE adrenotools) target_link_libraries(video_core PRIVATE adrenotools)
endif() endif()
if (ARCHITECTURE_arm64)
target_link_libraries(video_core PRIVATE sse2neon)
endif()
create_target_directory_groups(video_core) create_target_directory_groups(video_core)

View File

@ -1488,7 +1488,10 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
std::span<const u8> upload_span; std::span<const u8> upload_span;
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
if (IsRangeGranular(device_addr, copy.size)) { if (IsRangeGranular(device_addr, copy.size)) {
upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size); auto* const ptr = device_memory.GetPointer<u8>(device_addr);
if (ptr != nullptr) {
upload_span = std::span(ptr, copy.size);
}
} else { } else {
if (immediate_buffer.empty()) { if (immediate_buffer.empty()) {
immediate_buffer = ImmediateBuffer(largest_copy); immediate_buffer = ImmediateBuffer(largest_copy);

View File

@ -2,136 +2,130 @@
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
#include <bit> #include <bit>
#include "common/thread.h"
#include "core/core.h"
#include "video_core/cdma_pusher.h" #include "video_core/cdma_pusher.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/host1x/control.h" #include "video_core/host1x/control.h"
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
#include "video_core/host1x/nvdec.h" #include "video_core/host1x/nvdec.h"
#include "video_core/host1x/nvdec_common.h" #include "video_core/host1x/nvdec_common.h"
#include "video_core/host1x/sync_manager.h"
#include "video_core/host1x/vic.h" #include "video_core/host1x/vic.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
namespace Tegra { namespace Tegra {
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_)
: host1x{host1x_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(host1x)), CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
vic_processor(std::make_unique<Host1x::Vic>(host1x, nvdec_processor)), : host1x{host1x_}, memory_manager{host1x.GMMU()},
host1x_processor(std::make_unique<Host1x::Control>(host1x)), host_processor{std::make_unique<Host1x::Control>(host1x_)}, current_class{
sync_manager(std::make_unique<Host1x::SyncptIncrManager>(host1x)) {} static_cast<ChClassId>(id)} {
thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); });
}
CDmaPusher::~CDmaPusher() = default; CDmaPusher::~CDmaPusher() = default;
void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) { void CDmaPusher::ProcessEntries(std::stop_token stop_token) {
for (const auto& value : entries) { Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
if (mask != 0) { ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0};
const auto lbs = static_cast<u32>(std::countr_zero(mask)); u32 count{};
mask &= ~(1U << lbs); u32 method_offset{};
ExecuteCommand(offset + lbs, value.raw); u32 mask{};
continue; bool incrementing{};
} else if (count != 0) {
--count; while (!stop_token.stop_requested()) {
ExecuteCommand(offset, value.raw); {
if (incrementing) { std::unique_lock l{command_mutex};
++offset; Common::CondvarWait(command_cv, l, stop_token,
[this]() { return command_lists.size() > 0; });
if (stop_token.stop_requested()) {
return;
} }
continue;
command_list = std::move(command_lists.front());
command_lists.pop_front();
} }
const auto mode = value.submission_mode.Value();
switch (mode) { size_t i = 0;
case ChSubmissionMode::SetClass: { for (const auto value : command_list) {
mask = value.value & 0x3f; i++;
offset = value.method_offset; if (mask != 0) {
current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff); const auto lbs = static_cast<u32>(std::countr_zero(mask));
break; mask &= ~(1U << lbs);
} ExecuteCommand(method_offset + lbs, value.raw);
case ChSubmissionMode::Incrementing: continue;
case ChSubmissionMode::NonIncrementing: } else if (count != 0) {
count = value.value; --count;
offset = value.method_offset; ExecuteCommand(method_offset, value.raw);
incrementing = mode == ChSubmissionMode::Incrementing; if (incrementing) {
break; ++method_offset;
case ChSubmissionMode::Mask: }
mask = value.value; continue;
offset = value.method_offset; }
break; const auto mode = value.submission_mode.Value();
case ChSubmissionMode::Immediate: { switch (mode) {
const u32 data = value.value & 0xfff; case ChSubmissionMode::SetClass: {
offset = value.method_offset; mask = value.value & 0x3f;
ExecuteCommand(offset, data); method_offset = value.method_offset;
break; current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
} break;
default: }
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode)); case ChSubmissionMode::Incrementing:
break; case ChSubmissionMode::NonIncrementing:
count = value.value;
method_offset = value.method_offset;
incrementing = mode == ChSubmissionMode::Incrementing;
break;
case ChSubmissionMode::Mask:
mask = value.value;
method_offset = value.method_offset;
break;
case ChSubmissionMode::Immediate: {
const u32 data = value.value & 0xfff;
method_offset = value.method_offset;
ExecuteCommand(method_offset, data);
break;
}
default:
LOG_ERROR(HW_GPU, "Bad command at index {} (bytes 0x{:X}), buffer size {}", i - 1,
(i - 1) * sizeof(u32), command_list.size());
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!",
static_cast<u32>(mode));
break;
}
} }
} }
} }
void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { void CDmaPusher::ExecuteCommand(u32 method, u32 arg) {
switch (current_class) { switch (current_class) {
case ChClassId::NvDec:
ThiStateWrite(nvdec_thi_state, offset, data);
switch (static_cast<ThiMethod>(offset)) {
case ThiMethod::IncSyncpt: {
LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
if (cond == 0) {
sync_manager->Increment(syncpoint_id);
} else {
sync_manager->SignalDone(
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
}
break;
}
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
static_cast<u32>(nvdec_thi_state.method_0));
nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
break;
default:
break;
}
break;
case ChClassId::GraphicsVic:
ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data});
switch (static_cast<ThiMethod>(state_offset)) {
case ThiMethod::IncSyncpt: {
LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
if (cond == 0) {
sync_manager->Increment(syncpoint_id);
} else {
sync_manager->SignalDone(
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
}
break;
}
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
static_cast<u32>(vic_thi_state.method_0), data);
vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
data);
break;
default:
break;
}
break;
case ChClassId::Control: case ChClassId::Control:
// This device is mainly for syncpoint synchronization LOG_TRACE(Service_NVDRV, "Class {} method 0x{:X} arg 0x{:X}",
LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); static_cast<u32>(current_class), method, arg);
host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data); host_processor->ProcessMethod(static_cast<Host1x::Control::Method>(method), arg);
break; break;
default: default:
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); thi_regs.reg_array[method] = arg;
break; switch (static_cast<ThiMethod>(method)) {
case ThiMethod::IncSyncpt: {
const auto syncpoint_id = static_cast<u32>(arg & 0xFF);
[[maybe_unused]] const auto cond = static_cast<u32>((arg >> 8) & 0xFF);
LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}",
static_cast<u32>(current_class), syncpoint_id, cond);
auto& syncpoint_manager = host1x.GetSyncpointManager();
syncpoint_manager.IncrementGuest(syncpoint_id);
syncpoint_manager.IncrementHost(syncpoint_id);
break;
}
case ThiMethod::SetMethod1:
LOG_TRACE(Service_NVDRV, "Class {} method 0x{:X} arg 0x{:X}",
static_cast<u32>(current_class), static_cast<u32>(thi_regs.method_0), arg);
ProcessMethod(thi_regs.method_0, arg);
break;
default:
break;
}
} }
} }
void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, u32 argument) {
u8* const offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
std::memcpy(offset_ptr, &argument, sizeof(u32));
}
} // namespace Tegra } // namespace Tegra

View File

@ -3,12 +3,18 @@
#pragma once #pragma once
#include <condition_variable>
#include <deque>
#include <memory> #include <memory>
#include <mutex>
#include <thread>
#include <vector> #include <vector>
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "core/memory.h"
namespace Tegra { namespace Tegra {
@ -62,23 +68,31 @@ struct ChCommand {
std::vector<u32> arguments; std::vector<u32> arguments;
}; };
using ChCommandHeaderList = std::vector<ChCommandHeader>; using ChCommandHeaderList =
Core::Memory::CpuGuestMemory<Tegra::ChCommandHeader, Core::Memory::GuestMemoryFlags::SafeRead>;
struct ThiRegisters { struct ThiRegisters {
u32_le increment_syncpt{}; static constexpr std::size_t NUM_REGS = 0x20;
INSERT_PADDING_WORDS(1);
u32_le increment_syncpt_error{}; union {
u32_le ctx_switch_incremement_syncpt{}; struct {
INSERT_PADDING_WORDS(4); u32_le increment_syncpt;
u32_le ctx_switch{}; INSERT_PADDING_WORDS_NOINIT(1);
INSERT_PADDING_WORDS(1); u32_le increment_syncpt_error;
u32_le ctx_syncpt_eof{}; u32_le ctx_switch_incremement_syncpt;
INSERT_PADDING_WORDS(5); INSERT_PADDING_WORDS_NOINIT(4);
u32_le method_0{}; u32_le ctx_switch;
u32_le method_1{}; INSERT_PADDING_WORDS_NOINIT(1);
INSERT_PADDING_WORDS(12); u32_le ctx_syncpt_eof;
u32_le int_status{}; INSERT_PADDING_WORDS_NOINIT(5);
u32_le int_mask{}; u32_le method_0;
u32_le method_1;
INSERT_PADDING_WORDS_NOINIT(12);
u32_le int_status;
u32_le int_mask;
};
std::array<u32, NUM_REGS> reg_array;
};
}; };
enum class ThiMethod : u32 { enum class ThiMethod : u32 {
@ -89,32 +103,39 @@ enum class ThiMethod : u32 {
class CDmaPusher { class CDmaPusher {
public: public:
explicit CDmaPusher(Host1x::Host1x& host1x); CDmaPusher() = delete;
~CDmaPusher(); virtual ~CDmaPusher();
/// Process the command entry void PushEntries(ChCommandHeaderList&& entries) {
void ProcessEntries(ChCommandHeaderList&& entries); std::scoped_lock l{command_mutex};
command_lists.push_back(std::move(entries));
command_cv.notify_one();
}
protected:
explicit CDmaPusher(Host1x::Host1x& host1x, s32 id);
virtual void ProcessMethod(u32 method, u32 arg) = 0;
Host1x::Host1x& host1x;
Tegra::MemoryManager& memory_manager;
private: private:
/// Process the command entry
void ProcessEntries(std::stop_token stop_token);
/// Invoke command class devices to execute the command based on the current state /// Invoke command class devices to execute the command based on the current state
void ExecuteCommand(u32 state_offset, u32 data); void ExecuteCommand(u32 state_offset, u32 data);
/// Write arguments value to the ThiRegisters member at the specified offset std::unique_ptr<Host1x::Control> host_processor;
void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
Host1x::Host1x& host1x; std::mutex command_mutex;
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; std::condition_variable_any command_cv;
std::unique_ptr<Tegra::Host1x::Vic> vic_processor; std::deque<ChCommandHeaderList> command_lists;
std::unique_ptr<Tegra::Host1x::Control> host1x_processor; std::jthread thread;
std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
ChClassId current_class{};
ThiRegisters vic_thi_state{};
ThiRegisters nvdec_thi_state{};
u32 count{}; ThiRegisters thi_regs{};
u32 offset{}; ChClassId current_class;
u32 mask{};
bool incrementing{};
}; };
} // namespace Tegra } // namespace Tegra

View File

@ -250,30 +250,6 @@ struct GPU::Impl {
gpu_thread.SubmitList(channel, std::move(entries)); gpu_thread.SubmitList(channel, std::move(entries));
} }
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
if (!cdma_pushers.contains(id)) {
cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(host1x));
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
cdma_pushers[id]->ProcessEntries(std::move(entries));
}
/// Frees the CDMAPusher instance to free up resources
void ClearCdmaInstance(u32 id) {
const auto iter = cdma_pushers.find(id);
if (iter != cdma_pushers.end()) {
cdma_pushers.erase(iter);
}
}
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size) { void FlushRegion(DAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size); gpu_thread.FlushRegion(addr, size);
@ -362,7 +338,6 @@ struct GPU::Impl {
Core::System& system; Core::System& system;
Host1x::Host1x& host1x; Host1x::Host1x& host1x;
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
std::unique_ptr<VideoCore::RendererBase> renderer; std::unique_ptr<VideoCore::RendererBase> renderer;
VideoCore::RasterizerInterface* rasterizer = nullptr; VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec; const bool use_nvdec;
@ -556,14 +531,6 @@ void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
impl->PushGPUEntries(channel, std::move(entries)); impl->PushGPUEntries(channel, std::move(entries));
} }
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
impl->PushCommandBuffer(id, entries);
}
void GPU::ClearCdmaInstance(u32 id) {
impl->ClearCdmaInstance(id);
}
VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) {
return impl->OnCPURead(addr, size); return impl->OnCPURead(addr, size);
} }

View File

@ -234,15 +234,6 @@ public:
/// Push GPU command entries to be processed /// Push GPU command entries to be processed
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries); void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
/// Frees the CDMAPusher instance to free up resources
void ClearCdmaInstance(u32 id);
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
[[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size); [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size);

View File

@ -12,6 +12,7 @@
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/gpu_thread.h" #include "video_core/gpu_thread.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
namespace VideoCommon::GPUThread { namespace VideoCommon::GPUThread {

View File

@ -1,113 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/codec.h"
#include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/codecs/vp8.h"
#include "video_core/host1x/codecs/vp9.h"
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra {
Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
: host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
Codec::~Codec() = default;
void Codec::Initialize() {
initialized = decode_api.Initialize(current_codec);
}
void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
if (current_codec != codec) {
current_codec = codec;
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
}
}
void Codec::Decode() {
const bool is_first_frame = !initialized;
if (is_first_frame) {
Initialize();
}
if (!initialized) {
return;
}
// Assemble bitstream.
bool vp9_hidden_frame = false;
size_t configuration_size = 0;
const auto packet_data = [&]() {
switch (current_codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return vp8_decoder->ComposeFrame(state);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
vp9_decoder->ComposeFrame(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
return vp9_decoder->GetFrameBytes();
default:
ASSERT(false);
return std::span<const u8>{};
}
}();
// Send assembled bitstream to decoder.
if (!decode_api.SendPacket(packet_data, configuration_size)) {
return;
}
// Only receive/store visible frames.
if (vp9_hidden_frame) {
return;
}
// Receive output frames from decoder.
decode_api.ReceiveFrames(frames);
while (frames.size() > 10) {
LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
frames.pop();
}
}
std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
// Sometimes VIC will request more frames than have been decoded.
// in this case, return a blank frame and don't overwrite previous data.
if (frames.empty()) {
return {};
}
auto frame = std::move(frames.front());
frames.pop();
return frame;
}
Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
return current_codec;
}
std::string_view Codec::GetCurrentCodecName() const {
switch (current_codec) {
case Host1x::NvdecCommon::VideoCodec::None:
return "None";
case Host1x::NvdecCommon::VideoCodec::H264:
return "H264";
case Host1x::NvdecCommon::VideoCodec::VP8:
return "VP8";
case Host1x::NvdecCommon::VideoCodec::H265:
return "H265";
case Host1x::NvdecCommon::VideoCodec::VP9:
return "VP9";
default:
return "Unknown";
}
}
} // namespace Tegra

View File

@ -1,63 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <optional>
#include <string_view>
#include <queue>
#include "common/common_types.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
namespace Decoder {
class H264;
class VP8;
class VP9;
} // namespace Decoder
namespace Host1x {
class Host1x;
} // namespace Host1x
class Codec {
public:
explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs);
~Codec();
/// Initialize the codec, returning success or failure
void Initialize();
/// Sets NVDEC video stream codec
void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
/// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode();
/// Returns next decoded frame
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
/// Returns the value of current_codec
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
/// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
bool initialized{};
Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
FFmpeg::DecodeApi decode_api;
Host1x::Host1x& host1x;
const Host1x::NvdecCommon::NvdecRegisters& state;
std::unique_ptr<Decoder::H264> h264_decoder;
std::unique_ptr<Decoder::VP8> vp8_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
};
} // namespace Tegra

View File

@ -0,0 +1,71 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra {
Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_,
Host1x::FrameQueue& frame_queue_)
: host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{
frame_queue_} {}
Decoder::~Decoder() = default;
void Decoder::Decode() {
if (!initialized) {
return;
}
const auto packet_data = ComposeFrame();
// Send assembled bitstream to decoder.
if (!decode_api.SendPacket(packet_data)) {
return;
}
// Only receive/store visible frames.
if (vp9_hidden_frame) {
return;
}
// Receive output frames from decoder.
auto frame = decode_api.ReceiveFrame();
if (IsInterlaced()) {
auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets();
auto frame_copy = frame;
if (!frame.get()) {
LOG_ERROR(HW_GPU,
"Nvdec {} dailed to decode interlaced frame for top 0x{:X} bottom 0x{:X}", id,
luma_top, luma_bottom);
}
if (UsingDecodeOrder()) {
frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
} else {
frame_queue.PushPresentOrder(id, luma_top, std::move(frame));
frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy));
}
} else {
auto [luma_offset, chroma_offset] = GetProgressiveOffsets();
if (!frame.get()) {
LOG_ERROR(HW_GPU, "Nvdec {} failed to decode progressive frame for luma 0x{:X}", id,
luma_offset);
}
if (UsingDecodeOrder()) {
frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
} else {
frame_queue.PushPresentOrder(id, luma_offset, std::move(frame));
}
}
}
} // namespace Tegra

View File

@ -0,0 +1,64 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <mutex>
#include <optional>
#include <string_view>
#include <unordered_map>
#include <queue>
#include "common/common_types.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
namespace Host1x {
class Host1x;
class FrameQueue;
} // namespace Host1x
class Decoder {
public:
virtual ~Decoder();
/// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode();
bool UsingDecodeOrder() const {
return decode_api.UsingDecodeOrder();
}
/// Returns the value of current_codec
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const {
return codec;
}
/// Return name of the current codec
[[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0;
protected:
explicit Decoder(Host1x::Host1x& host1x, s32 id,
const Host1x::NvdecCommon::NvdecRegisters& regs,
Host1x::FrameQueue& frame_queue);
virtual std::span<const u8> ComposeFrame() = 0;
virtual std::tuple<u64, u64> GetProgressiveOffsets() = 0;
virtual std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() = 0;
virtual bool IsInterlaced() = 0;
Host1x::Host1x& host1x;
Tegra::MemoryManager& memory_manager;
const Host1x::NvdecCommon::NvdecRegisters& regs;
s32 id;
Host1x::FrameQueue& frame_queue;
Host1x::NvdecCommon::VideoCodec codec;
FFmpeg::DecodeApi decode_api;
bool initialized{};
bool vp9_hidden_frame{};
};
} // namespace Tegra

View File

@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: Ryujinx Team and Contributors // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: GPL-2.0-or-later
#include <array> #include <array>
#include <bit> #include <bit>
@ -10,7 +10,7 @@
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
namespace Tegra::Decoder { namespace Tegra::Decoders {
namespace { namespace {
// ZigZag LUTs from libavcodec. // ZigZag LUTs from libavcodec.
constexpr std::array<u8, 64> zig_zag_direct{ constexpr std::array<u8, 64> zig_zag_direct{
@ -25,23 +25,56 @@ constexpr std::array<u8, 16> zig_zag_scan{
}; };
} // Anonymous namespace } // Anonymous namespace
H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {} H264::H264(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
Host1x::FrameQueue& frame_queue_)
: Decoder{host1x_, id_, regs_, frame_queue_} {
codec = Host1x::NvdecCommon::VideoCodec::H264;
initialized = decode_api.Initialize(codec);
}
H264::~H264() = default; H264::~H264() = default;
std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, std::tuple<u64, u64> H264::GetProgressiveOffsets() {
size_t* out_configuration_size, bool is_first_frame) { auto pic_idx{current_context.h264_parameter_set.curr_pic_idx};
H264DecoderContext context; auto luma{regs.surface_luma_offsets[pic_idx].Address() +
host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); current_context.h264_parameter_set.luma_frame_offset.Address()};
auto chroma{regs.surface_chroma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.chroma_frame_offset.Address()};
return {luma, chroma};
}
const s64 frame_number = context.h264_parameter_set.frame_number.Value(); std::tuple<u64, u64, u64, u64> H264::GetInterlacedOffsets() {
auto pic_idx{current_context.h264_parameter_set.curr_pic_idx};
auto luma_top{regs.surface_luma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.luma_top_offset.Address()};
auto luma_bottom{regs.surface_luma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.luma_bot_offset.Address()};
auto chroma_top{regs.surface_chroma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.chroma_top_offset.Address()};
auto chroma_bottom{regs.surface_chroma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.chroma_bot_offset.Address()};
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
}
bool H264::IsInterlaced() {
return current_context.h264_parameter_set.luma_top_offset.Address() != 0 ||
current_context.h264_parameter_set.luma_bot_offset.Address() != 0;
}
std::span<const u8> H264::ComposeFrame() {
memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context,
sizeof(H264DecoderContext));
const s64 frame_number = current_context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) { if (!is_first_frame && frame_number != 0) {
frame.resize_destructive(context.stream_len); frame_scratch.resize_destructive(current_context.stream_len);
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(),
*out_configuration_size = 0; frame_scratch.size());
return frame; return frame_scratch;
} }
is_first_frame = false;
// Encode header // Encode header
H264BitWriter writer{}; H264BitWriter writer{};
writer.WriteU(1, 24); writer.WriteU(1, 24);
@ -53,7 +86,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
writer.WriteU(31, 8); writer.WriteU(31, 8);
writer.WriteUe(0); writer.WriteUe(0);
const u32 chroma_format_idc = const u32 chroma_format_idc =
static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); static_cast<u32>(current_context.h264_parameter_set.chroma_format_idc.Value());
writer.WriteUe(chroma_format_idc); writer.WriteUe(chroma_format_idc);
if (chroma_format_idc == 3) { if (chroma_format_idc == 3) {
writer.WriteBit(false); writer.WriteBit(false);
@ -61,42 +94,44 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
writer.WriteUe(0); writer.WriteUe(0);
writer.WriteUe(0); writer.WriteUe(0);
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag writer.WriteBit(current_context.qpprime_y_zero_transform_bypass_flag.Value() != 0);
writer.WriteBit(false); // Scaling matrix present flag writer.WriteBit(false); // Scaling matrix present flag
writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); writer.WriteUe(
static_cast<u32>(current_context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
const auto order_cnt_type = const auto order_cnt_type =
static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); static_cast<u32>(current_context.h264_parameter_set.pic_order_cnt_type.Value());
writer.WriteUe(order_cnt_type); writer.WriteUe(order_cnt_type);
if (order_cnt_type == 0) { if (order_cnt_type == 0) {
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); writer.WriteUe(current_context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
} else if (order_cnt_type == 1) { } else if (order_cnt_type == 1) {
writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); writer.WriteBit(current_context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
writer.WriteSe(0); writer.WriteSe(0);
writer.WriteSe(0); writer.WriteSe(0);
writer.WriteUe(0); writer.WriteUe(0);
} }
const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / const s32 pic_height = current_context.h264_parameter_set.frame_height_in_mbs /
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); (current_context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
// TODO (ameerj): Where do we get this number, it seems to be particular for each stream u32 max_num_ref_frames =
const auto nvdec_decoding = Settings::values.nvdec_emulation.GetValue(); std::max(std::max(current_context.h264_parameter_set.num_refidx_l0_default_active,
const bool uses_gpu_decoding = nvdec_decoding == Settings::NvdecEmulation::Gpu; current_context.h264_parameter_set.num_refidx_l1_default_active) +
const u32 max_num_ref_frames = uses_gpu_decoding ? 6u : 16u; 1,
4);
writer.WriteUe(max_num_ref_frames); writer.WriteUe(max_num_ref_frames);
writer.WriteBit(false); writer.WriteBit(false);
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); writer.WriteUe(current_context.h264_parameter_set.pic_width_in_mbs - 1);
writer.WriteUe(pic_height - 1); writer.WriteUe(pic_height - 1);
writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); writer.WriteBit(current_context.h264_parameter_set.frame_mbs_only_flag != 0);
if (!context.h264_parameter_set.frame_mbs_only_flag) { if (!current_context.h264_parameter_set.frame_mbs_only_flag) {
writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); writer.WriteBit(current_context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
} }
writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); writer.WriteBit(current_context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
writer.WriteBit(false); // Frame cropping flag writer.WriteBit(false); // Frame cropping flag
writer.WriteBit(false); // VUI parameter present flag writer.WriteBit(false); // VUI parameter present flag
@ -111,57 +146,59 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
writer.WriteUe(0); writer.WriteUe(0);
writer.WriteUe(0); writer.WriteUe(0);
writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); writer.WriteBit(current_context.h264_parameter_set.entropy_coding_mode_flag != 0);
writer.WriteBit(context.h264_parameter_set.pic_order_present_flag != 0); writer.WriteBit(current_context.h264_parameter_set.pic_order_present_flag != 0);
writer.WriteUe(0); writer.WriteUe(0);
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); writer.WriteUe(current_context.h264_parameter_set.num_refidx_l0_default_active);
writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); writer.WriteUe(current_context.h264_parameter_set.num_refidx_l1_default_active);
writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); writer.WriteBit(current_context.h264_parameter_set.flags.weighted_pred.Value() != 0);
writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); writer.WriteU(static_cast<s32>(current_context.h264_parameter_set.weighted_bipred_idc.Value()),
s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); 2);
s32 pic_init_qp =
static_cast<s32>(current_context.h264_parameter_set.pic_init_qp_minus26.Value());
writer.WriteSe(pic_init_qp); writer.WriteSe(pic_init_qp);
writer.WriteSe(0); writer.WriteSe(0);
s32 chroma_qp_index_offset = s32 chroma_qp_index_offset =
static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); static_cast<s32>(current_context.h264_parameter_set.chroma_qp_index_offset.Value());
writer.WriteSe(chroma_qp_index_offset); writer.WriteSe(chroma_qp_index_offset);
writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); writer.WriteBit(current_context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); writer.WriteBit(current_context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); writer.WriteBit(current_context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); writer.WriteBit(current_context.h264_parameter_set.transform_8x8_mode_flag != 0);
writer.WriteBit(true); // pic_scaling_matrix_present_flag writer.WriteBit(true); // pic_scaling_matrix_present_flag
for (s32 index = 0; index < 6; index++) { for (s32 index = 0; index < 6; index++) {
writer.WriteBit(true); writer.WriteBit(true);
std::span<const u8> matrix{context.weight_scale}; std::span<const u8> matrix{current_context.weight_scale_4x4};
writer.WriteScalingList(scan, matrix, index * 16, 16); writer.WriteScalingList(scan_scratch, matrix, index * 16, 16);
} }
if (context.h264_parameter_set.transform_8x8_mode_flag) { if (current_context.h264_parameter_set.transform_8x8_mode_flag) {
for (s32 index = 0; index < 2; index++) { for (s32 index = 0; index < 2; index++) {
writer.WriteBit(true); writer.WriteBit(true);
std::span<const u8> matrix{context.weight_scale_8x8}; std::span<const u8> matrix{current_context.weight_scale_8x8};
writer.WriteScalingList(scan, matrix, index * 64, 64); writer.WriteScalingList(scan_scratch, matrix, index * 64, 64);
} }
} }
s32 chroma_qp_index_offset2 = s32 chroma_qp_index_offset2 =
static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); static_cast<s32>(current_context.h264_parameter_set.second_chroma_qp_index_offset.Value());
writer.WriteSe(chroma_qp_index_offset2); writer.WriteSe(chroma_qp_index_offset2);
writer.End(); writer.End();
const auto& encoded_header = writer.GetByteArray(); const auto& encoded_header = writer.GetByteArray();
frame.resize(encoded_header.size() + context.stream_len); frame_scratch.resize(encoded_header.size() + current_context.stream_len);
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); std::memcpy(frame_scratch.data(), encoded_header.data(), encoded_header.size());
*out_configuration_size = encoded_header.size(); memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(),
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(), frame_scratch.data() + encoded_header.size(),
context.stream_len); current_context.stream_len);
return frame; return frame_scratch;
} }
H264BitWriter::H264BitWriter() = default; H264BitWriter::H264BitWriter() = default;
@ -278,4 +315,4 @@ void H264BitWriter::Flush() {
buffer = 0; buffer = 0;
buffer_pos = 0; buffer_pos = 0;
} }
} // namespace Tegra::Decoder } // namespace Tegra::Decoders

View File

@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: Ryujinx Team and Contributors // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
@ -10,6 +10,7 @@
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/nvdec_common.h" #include "video_core/host1x/nvdec_common.h"
namespace Tegra { namespace Tegra {
@ -18,7 +19,7 @@ namespace Host1x {
class Host1x; class Host1x;
} // namespace Host1x } // namespace Host1x
namespace Decoder { namespace Decoders {
class H264BitWriter { class H264BitWriter {
public: public:
@ -60,123 +61,213 @@ private:
std::vector<u8> byte_array; std::vector<u8> byte_array;
}; };
class H264 { struct Offset {
public: constexpr u32 Address() const noexcept {
explicit H264(Host1x::Host1x& host1x); return offset << 8;
~H264(); }
/// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
size_t* out_configuration_size,
bool is_first_frame = false);
private: private:
Common::ScratchBuffer<u8> frame; u32 offset;
Common::ScratchBuffer<u8> scan; };
Host1x::Host1x& host1x; static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!");
struct H264ParameterSet { struct H264ParameterSet {
s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
s32 delta_pic_order_always_zero_flag; ///< 0x04 s32 delta_pic_order_always_zero_flag; ///< 0x04
s32 frame_mbs_only_flag; ///< 0x08 s32 frame_mbs_only_flag; ///< 0x08
u32 pic_width_in_mbs; ///< 0x0C u32 pic_width_in_mbs; ///< 0x0C
u32 frame_height_in_map_units; ///< 0x10 u32 frame_height_in_mbs; ///< 0x10
union { ///< 0x14 union { ///< 0x14
BitField<0, 2, u32> tile_format; BitField<0, 2, u32> tile_format;
BitField<2, 3, u32> gob_height; BitField<2, 3, u32> gob_height;
}; BitField<5, 27, u32> reserved_surface_format;
u32 entropy_coding_mode_flag; ///< 0x18
s32 pic_order_present_flag; ///< 0x1C
s32 num_refidx_l0_default_active; ///< 0x20
s32 num_refidx_l1_default_active; ///< 0x24
s32 deblocking_filter_control_present_flag; ///< 0x28
s32 redundant_pic_cnt_present_flag; ///< 0x2C
u32 transform_8x8_mode_flag; ///< 0x30
u32 pitch_luma; ///< 0x34
u32 pitch_chroma; ///< 0x38
u32 luma_top_offset; ///< 0x3C
u32 luma_bot_offset; ///< 0x40
u32 luma_frame_offset; ///< 0x44
u32 chroma_top_offset; ///< 0x48
u32 chroma_bot_offset; ///< 0x4C
u32 chroma_frame_offset; ///< 0x50
u32 hist_buffer_size; ///< 0x54
union { ///< 0x58
union {
BitField<0, 1, u64> mbaff_frame;
BitField<1, 1, u64> direct_8x8_inference;
BitField<2, 1, u64> weighted_pred;
BitField<3, 1, u64> constrained_intra_pred;
BitField<4, 1, u64> ref_pic;
BitField<5, 1, u64> field_pic;
BitField<6, 1, u64> bottom_field;
BitField<7, 1, u64> second_field;
} flags;
BitField<8, 4, u64> log2_max_frame_num_minus4;
BitField<12, 2, u64> chroma_format_idc;
BitField<14, 2, u64> pic_order_cnt_type;
BitField<16, 6, s64> pic_init_qp_minus26;
BitField<22, 5, s64> chroma_qp_index_offset;
BitField<27, 5, s64> second_chroma_qp_index_offset;
BitField<32, 2, u64> weighted_bipred_idc;
BitField<34, 7, u64> curr_pic_idx;
BitField<41, 5, u64> curr_col_idx;
BitField<46, 16, u64> frame_number;
BitField<62, 1, u64> frame_surfaces;
BitField<63, 1, u64> output_memory_layout;
};
}; };
static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); u32 entropy_coding_mode_flag; ///< 0x18
s32 pic_order_present_flag; ///< 0x1C
struct H264DecoderContext { s32 num_refidx_l0_default_active; ///< 0x20
INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 s32 num_refidx_l1_default_active; ///< 0x24
u32 stream_len; ///< 0x0048 s32 deblocking_filter_control_present_flag; ///< 0x28
INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C s32 redundant_pic_cnt_present_flag; ///< 0x2C
H264ParameterSet h264_parameter_set; ///< 0x0058 u32 transform_8x8_mode_flag; ///< 0x30
INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 u32 pitch_luma; ///< 0x34
std::array<u8, 0x60> weight_scale; ///< 0x01C0 u32 pitch_chroma; ///< 0x38
std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 Offset luma_top_offset; ///< 0x3C
Offset luma_bot_offset; ///< 0x40
Offset luma_frame_offset; ///< 0x44
Offset chroma_top_offset; ///< 0x48
Offset chroma_bot_offset; ///< 0x4C
Offset chroma_frame_offset; ///< 0x50
u32 hist_buffer_size; ///< 0x54
union { ///< 0x58
union {
BitField<0, 1, u64> mbaff_frame;
BitField<1, 1, u64> direct_8x8_inference;
BitField<2, 1, u64> weighted_pred;
BitField<3, 1, u64> constrained_intra_pred;
BitField<4, 1, u64> ref_pic;
BitField<5, 1, u64> field_pic;
BitField<6, 1, u64> bottom_field;
BitField<7, 1, u64> second_field;
} flags;
BitField<8, 4, u64> log2_max_frame_num_minus4;
BitField<12, 2, u64> chroma_format_idc;
BitField<14, 2, u64> pic_order_cnt_type;
BitField<16, 6, s64> pic_init_qp_minus26;
BitField<22, 5, s64> chroma_qp_index_offset;
BitField<27, 5, s64> second_chroma_qp_index_offset;
BitField<32, 2, u64> weighted_bipred_idc;
BitField<34, 7, u64> curr_pic_idx;
BitField<41, 5, u64> curr_col_idx;
BitField<46, 16, u64> frame_number;
BitField<62, 1, u64> frame_surfaces;
BitField<63, 1, u64> output_memory_layout;
}; };
static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); };
static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
#define ASSERT_POSITION(field_name, position) \ #define ASSERT_POSITION(field_name, position) \
static_assert(offsetof(H264ParameterSet, field_name) == position, \ static_assert(offsetof(H264ParameterSet, field_name) == position, \
"Field " #field_name " has invalid position") "Field " #field_name " has invalid position")
ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
ASSERT_POSITION(frame_mbs_only_flag, 0x08); ASSERT_POSITION(frame_mbs_only_flag, 0x08);
ASSERT_POSITION(pic_width_in_mbs, 0x0C); ASSERT_POSITION(pic_width_in_mbs, 0x0C);
ASSERT_POSITION(frame_height_in_map_units, 0x10); ASSERT_POSITION(frame_height_in_mbs, 0x10);
ASSERT_POSITION(tile_format, 0x14); ASSERT_POSITION(tile_format, 0x14);
ASSERT_POSITION(entropy_coding_mode_flag, 0x18); ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
ASSERT_POSITION(pic_order_present_flag, 0x1C); ASSERT_POSITION(pic_order_present_flag, 0x1C);
ASSERT_POSITION(num_refidx_l0_default_active, 0x20); ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
ASSERT_POSITION(num_refidx_l1_default_active, 0x24); ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
ASSERT_POSITION(transform_8x8_mode_flag, 0x30); ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
ASSERT_POSITION(pitch_luma, 0x34); ASSERT_POSITION(pitch_luma, 0x34);
ASSERT_POSITION(pitch_chroma, 0x38); ASSERT_POSITION(pitch_chroma, 0x38);
ASSERT_POSITION(luma_top_offset, 0x3C); ASSERT_POSITION(luma_top_offset, 0x3C);
ASSERT_POSITION(luma_bot_offset, 0x40); ASSERT_POSITION(luma_bot_offset, 0x40);
ASSERT_POSITION(luma_frame_offset, 0x44); ASSERT_POSITION(luma_frame_offset, 0x44);
ASSERT_POSITION(chroma_top_offset, 0x48); ASSERT_POSITION(chroma_top_offset, 0x48);
ASSERT_POSITION(chroma_bot_offset, 0x4C); ASSERT_POSITION(chroma_bot_offset, 0x4C);
ASSERT_POSITION(chroma_frame_offset, 0x50); ASSERT_POSITION(chroma_frame_offset, 0x50);
ASSERT_POSITION(hist_buffer_size, 0x54); ASSERT_POSITION(hist_buffer_size, 0x54);
ASSERT_POSITION(flags, 0x58); ASSERT_POSITION(flags, 0x58);
#undef ASSERT_POSITION #undef ASSERT_POSITION
struct DpbEntry {
union {
BitField<0, 7, u32> index;
BitField<7, 5, u32> col_idx;
BitField<12, 2, u32> state;
BitField<14, 1, u32> is_long_term;
BitField<15, 1, u32> non_existing;
BitField<16, 1, u32> is_field;
BitField<17, 4, u32> top_field_marking;
BitField<21, 4, u32> bottom_field_marking;
BitField<25, 1, u32> output_memory_layout;
BitField<26, 6, u32> reserved;
} flags;
std::array<u32, 2> field_order_cnt;
u32 frame_idx;
};
static_assert(sizeof(DpbEntry) == 0x10, "DpbEntry has the wrong size!");
struct DisplayParam {
union {
BitField<0, 1, u32> enable_tf_output;
BitField<1, 1, u32> vc1_map_y_flag;
BitField<2, 3, u32> map_y_value;
BitField<5, 1, u32> vc1_map_uv_flag;
BitField<6, 3, u32> map_uv_value;
BitField<9, 8, u32> out_stride;
BitField<17, 3, u32> tiling_format;
BitField<20, 1, u32> output_structure; // 0=frame, 1=field
BitField<21, 11, u32> reserved0;
};
std::array<s32, 2> output_top;
std::array<s32, 2> output_bottom;
union {
BitField<0, 1, u32> enable_histogram;
BitField<1, 12, u32> histogram_start_x;
BitField<13, 12, u32> histogram_start_y;
BitField<25, 7, u32> reserved1;
};
union {
BitField<0, 12, u32> histogram_end_x;
BitField<12, 12, u32> histogram_end_y;
BitField<24, 8, u32> reserved2;
};
};
static_assert(sizeof(DisplayParam) == 0x1C, "DisplayParam has the wrong size!");
struct H264DecoderContext {
INSERT_PADDING_WORDS_NOINIT(13); ///< 0x0000
std::array<u8, 16> eos; ///< 0x0034
u8 explicit_eos_present_flag; ///< 0x0044
u8 hint_dump_en; ///< 0x0045
INSERT_PADDING_BYTES_NOINIT(2); ///< 0x0046
u32 stream_len; ///< 0x0048
u32 slice_count; ///< 0x004C
u32 mbhist_buffer_size; ///< 0x0050
u32 gptimer_timeout_value; ///< 0x0054
H264ParameterSet h264_parameter_set; ///< 0x0058
std::array<s32, 2> curr_field_order_cnt; ///< 0x00B8
std::array<DpbEntry, 16> dpb; ///< 0x00C0
std::array<u8, 0x60> weight_scale_4x4; ///< 0x01C0
std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
std::array<u8, 2> num_inter_view_refs_lX; ///< 0x02A0
std::array<u8, 14> reserved2; ///< 0x02A2
std::array<std::array<s8, 16>, 2> inter_view_refidx_lX; ///< 0x02B0
union { ///< 0x02D0
BitField<0, 1, u32> lossless_ipred8x8_filter_enable;
BitField<1, 1, u32> qpprime_y_zero_transform_bypass_flag;
BitField<2, 30, u32> reserved3;
};
DisplayParam display_param; ///< 0x02D4
std::array<u32, 3> reserved4; ///< 0x02F0
};
static_assert(sizeof(H264DecoderContext) == 0x2FC, "H264DecoderContext is an invalid size");
#define ASSERT_POSITION(field_name, position) \ #define ASSERT_POSITION(field_name, position) \
static_assert(offsetof(H264DecoderContext, field_name) == position, \ static_assert(offsetof(H264DecoderContext, field_name) == position, \
"Field " #field_name " has invalid position") "Field " #field_name " has invalid position")
ASSERT_POSITION(stream_len, 0x48); ASSERT_POSITION(stream_len, 0x48);
ASSERT_POSITION(h264_parameter_set, 0x58); ASSERT_POSITION(h264_parameter_set, 0x58);
ASSERT_POSITION(weight_scale, 0x1C0); ASSERT_POSITION(dpb, 0xC0);
ASSERT_POSITION(weight_scale_4x4, 0x1C0);
#undef ASSERT_POSITION #undef ASSERT_POSITION
class H264 final : public Decoder {
public:
explicit H264(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
Host1x::FrameQueue& frame_queue);
~H264() override;
H264(const H264&) = delete;
H264& operator=(const H264&) = delete;
H264(H264&&) = delete;
H264& operator=(H264&&) = delete;
/// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] std::span<const u8> ComposeFrame() override;
std::tuple<u64, u64> GetProgressiveOffsets() override;
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
bool IsInterlaced() override;
std::string_view GetCurrentCodecName() const override {
return "H264";
}
private:
bool is_first_frame{true};
Common::ScratchBuffer<u8> frame_scratch;
Common::ScratchBuffer<u8> scan_scratch;
H264DecoderContext current_context{};
}; };
} // namespace Decoder } // namespace Decoders
} // namespace Tegra } // namespace Tegra

View File

@ -7,47 +7,70 @@
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
namespace Tegra::Decoder { namespace Tegra::Decoders {
VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {} VP8::VP8(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
Host1x::FrameQueue& frame_queue_)
: Decoder{host1x_, id_, regs_, frame_queue_} {
codec = Host1x::NvdecCommon::VideoCodec::VP8;
initialized = decode_api.Initialize(codec);
}
VP8::~VP8() = default; VP8::~VP8() = default;
std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { std::tuple<u64, u64> VP8::GetProgressiveOffsets() {
VP8PictureInfo info; auto luma{regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); auto chroma{regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
return {luma, chroma};
}
const bool is_key_frame = info.key_frame == 1u; std::tuple<u64, u64, u64, u64> VP8::GetInterlacedOffsets() {
const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size); auto luma_top{regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
auto luma_bottom{
regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
auto chroma_top{
regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
auto chroma_bottom{
regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
}
std::span<const u8> VP8::ComposeFrame() {
memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context,
sizeof(VP8PictureInfo));
const bool is_key_frame = current_context.key_frame == 1u;
const auto bitstream_size = static_cast<size_t>(current_context.vld_buffer_size);
const size_t header_size = is_key_frame ? 10u : 3u; const size_t header_size = is_key_frame ? 10u : 3u;
frame.resize(header_size + bitstream_size); frame_scratch.resize(header_size + bitstream_size);
// Based on page 30 of the VP8 specification. // Based on page 30 of the VP8 specification.
// https://datatracker.ietf.org/doc/rfc6386/ // https://datatracker.ietf.org/doc/rfc6386/
frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes). frame_scratch[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number frame_scratch[0] |=
frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag static_cast<u8>((current_context.version & 7u) << 1u); // 3-bit version number
frame_scratch[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
// The next 19-bits are the first partition size // The next 19-bits are the first partition size
frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u); frame_scratch[0] |= static_cast<u8>((current_context.first_part_size & 7u) << 5u);
frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u); frame_scratch[1] = static_cast<u8>((current_context.first_part_size & 0x7f8u) >> 3u);
frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u); frame_scratch[2] = static_cast<u8>((current_context.first_part_size & 0x7f800u) >> 11u);
if (is_key_frame) { if (is_key_frame) {
frame[3] = 0x9du; frame_scratch[3] = 0x9du;
frame[4] = 0x01u; frame_scratch[4] = 0x01u;
frame[5] = 0x2au; frame_scratch[5] = 0x2au;
// TODO(ameerj): Horizontal/Vertical Scale // TODO(ameerj): Horizontal/Vertical Scale
// 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits) // 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits)
frame[6] = static_cast<u8>(info.frame_width & 0xff); frame_scratch[6] = static_cast<u8>(current_context.frame_width & 0xff);
frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f)); frame_scratch[7] = static_cast<u8>(((current_context.frame_width >> 8) & 0x3f));
// 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits) // 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits)
frame[8] = static_cast<u8>(info.frame_height & 0xff); frame_scratch[8] = static_cast<u8>(current_context.frame_height & 0xff);
frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f)); frame_scratch[9] = static_cast<u8>(((current_context.frame_height >> 8) & 0x3f));
} }
const u64 bitstream_offset = state.frame_bitstream_offset; const u64 bitstream_offset = regs.frame_bitstream_offset.Address();
host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); memory_manager.ReadBlock(bitstream_offset, frame_scratch.data() + header_size, bitstream_size);
return frame; return frame_scratch;
} }
} // namespace Tegra::Decoder } // namespace Tegra::Decoders

View File

@ -9,6 +9,7 @@
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/nvdec_common.h" #include "video_core/host1x/nvdec_common.h"
namespace Tegra { namespace Tegra {
@ -17,20 +18,41 @@ namespace Host1x {
class Host1x; class Host1x;
} // namespace Host1x } // namespace Host1x
namespace Decoder { namespace Decoders {
enum class Vp8SurfaceIndex : u32 {
Last = 0,
Golden = 1,
AltRef = 2,
Current = 3,
};
class VP8 { class VP8 final : public Decoder {
public: public:
explicit VP8(Host1x::Host1x& host1x); explicit VP8(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
~VP8(); Host1x::FrameQueue& frame_queue);
~VP8() override;
/// Compose the VP8 frame for FFmpeg decoding VP8(const VP8&) = delete;
[[nodiscard]] std::span<const u8> ComposeFrame( VP8& operator=(const VP8&) = delete;
const Host1x::NvdecCommon::NvdecRegisters& state);
VP8(VP8&&) = delete;
VP8& operator=(VP8&&) = delete;
[[nodiscard]] std::span<const u8> ComposeFrame() override;
std::tuple<u64, u64> GetProgressiveOffsets() override;
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
bool IsInterlaced() override {
return false;
}
std::string_view GetCurrentCodecName() const override {
return "VP8";
}
private: private:
Common::ScratchBuffer<u8> frame; Common::ScratchBuffer<u8> frame_scratch;
Host1x::Host1x& host1x;
struct VP8PictureInfo { struct VP8PictureInfo {
INSERT_PADDING_WORDS_NOINIT(14); INSERT_PADDING_WORDS_NOINIT(14);
@ -73,7 +95,9 @@ private:
INSERT_PADDING_WORDS_NOINIT(3); INSERT_PADDING_WORDS_NOINIT(3);
}; };
static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size"); static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size");
VP8PictureInfo current_context{};
}; };
} // namespace Decoder } // namespace Decoders
} // namespace Tegra } // namespace Tegra

View File

@ -4,12 +4,13 @@
#include <algorithm> // for std::copy #include <algorithm> // for std::copy
#include <numeric> #include <numeric>
#include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "video_core/host1x/codecs/vp9.h" #include "video_core/host1x/codecs/vp9.h"
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
namespace Tegra::Decoder { namespace Tegra::Decoders {
namespace { namespace {
constexpr u32 diff_update_probability = 252; constexpr u32 diff_update_probability = 252;
constexpr u32 frame_sync_code = 0x498342; constexpr u32 frame_sync_code = 0x498342;
@ -237,7 +238,12 @@ constexpr std::array<u8, 254> map_lut{
} }
} // Anonymous namespace } // Anonymous namespace
VP9::VP9(Host1x::Host1x& host1x_) : host1x{host1x_} {} VP9::VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
Host1x::FrameQueue& frame_queue_)
: Decoder{host1x_, id_, regs_, frame_queue_} {
codec = Host1x::NvdecCommon::VideoCodec::VP9;
initialized = decode_api.Initialize(codec);
}
VP9::~VP9() = default; VP9::~VP9() = default;
@ -356,35 +362,113 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
} }
} }
Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { void VP9::WriteSegmentation(VpxBitStreamWriter& writer) {
PictureInfo picture_info; bool enabled = current_picture_info.segmentation.enabled != 0;
host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); writer.WriteBit(enabled);
Vp9PictureInfo vp9_info = picture_info.Convert(); if (!enabled) {
return;
}
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); auto update_map = current_picture_info.segmentation.update_map != 0;
writer.WriteBit(update_map);
if (update_map) {
EntropyProbs entropy_probs{};
memory_manager.ReadBlock(regs.vp9_prob_tab_buffer_offset.Address(), &entropy_probs,
sizeof(entropy_probs));
auto WriteProb = [&](u8 prob) {
bool coded = prob != 255;
writer.WriteBit(coded);
if (coded) {
writer.WriteU(prob, 8);
}
};
for (size_t i = 0; i < entropy_probs.mb_segment_tree_probs.size(); i++) {
WriteProb(entropy_probs.mb_segment_tree_probs[i]);
}
auto temporal_update = current_picture_info.segmentation.temporal_update != 0;
writer.WriteBit(temporal_update);
if (temporal_update) {
for (s32 i = 0; i < 3; i++) {
WriteProb(entropy_probs.segment_pred_probs[i]);
}
}
}
if (last_segmentation == current_picture_info.segmentation) {
writer.WriteBit(false);
return;
}
last_segmentation = current_picture_info.segmentation;
writer.WriteBit(true);
writer.WriteBit(current_picture_info.segmentation.abs_delta != 0);
constexpr s32 MAX_SEGMENTS = 8;
constexpr std::array SegmentationFeatureBits = {8, 6, 2, 0};
for (s32 i = 0; i < MAX_SEGMENTS; i++) {
auto q_enabled = current_picture_info.segmentation.feature_enabled[i][0] != 0;
writer.WriteBit(q_enabled);
if (q_enabled) {
writer.WriteS(current_picture_info.segmentation.feature_data[i][0],
SegmentationFeatureBits[0]);
}
auto lf_enabled = current_picture_info.segmentation.feature_enabled[i][1] != 0;
writer.WriteBit(lf_enabled);
if (lf_enabled) {
writer.WriteS(current_picture_info.segmentation.feature_data[i][1],
SegmentationFeatureBits[1]);
}
auto ref_enabled = current_picture_info.segmentation.feature_enabled[i][2] != 0;
writer.WriteBit(ref_enabled);
if (ref_enabled) {
writer.WriteU(current_picture_info.segmentation.feature_data[i][2],
SegmentationFeatureBits[2]);
}
auto skip_enabled = current_picture_info.segmentation.feature_enabled[i][3] != 0;
writer.WriteBit(skip_enabled);
}
}
Vp9PictureInfo VP9::GetVp9PictureInfo() {
memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_picture_info,
sizeof(PictureInfo));
Vp9PictureInfo vp9_info = current_picture_info.Convert();
InsertEntropy(regs.vp9_prob_tab_buffer_offset.Address(), vp9_info.entropy);
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
// order: last, golden, altref, current. // order: last, golden, altref, current.
std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4, for (size_t i = 0; i < 4; i++) {
vp9_info.frame_offsets.begin()); vp9_info.frame_offsets[i] = regs.surface_luma_offsets[i].Address();
}
return vp9_info; return vp9_info;
} }
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
EntropyProbs entropy; EntropyProbs entropy;
host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); memory_manager.ReadBlock(offset, &entropy, sizeof(EntropyProbs));
entropy.Convert(dst); entropy.Convert(dst);
} }
Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { Vp9FrameContainer VP9::GetCurrentFrame() {
Vp9FrameContainer current_frame{}; Vp9FrameContainer current_frame{};
{ {
// gpu.SyncGuestHost(); epic, why? // gpu.SyncGuestHost(); epic, why?
current_frame.info = GetVp9PictureInfo(state); current_frame.info = GetVp9PictureInfo();
current_frame.bit_stream.resize(current_frame.info.bitstream_size); current_frame.bit_stream.resize(current_frame.info.bitstream_size);
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(),
current_frame.info.bitstream_size); current_frame.bit_stream.data(),
current_frame.info.bitstream_size);
} }
if (!next_frame.bit_stream.empty()) { if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp{ Vp9FrameContainer temp{
@ -742,8 +826,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
ASSERT(!current_frame_info.segment_enabled); WriteSegmentation(uncomp_writer);
uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width); const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
@ -770,10 +853,29 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
return uncomp_writer; return uncomp_writer;
} }
void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { std::tuple<u64, u64> VP9::GetProgressiveOffsets() {
auto luma{regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto chroma{regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
return {luma, chroma};
}
std::tuple<u64, u64, u64, u64> VP9::GetInterlacedOffsets() {
auto luma_top{regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto luma_bottom{
regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto chroma_top{
regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto chroma_bottom{
regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
}
std::span<const u8> VP9::ComposeFrame() {
vp9_hidden_frame = false;
std::vector<u8> bitstream; std::vector<u8> bitstream;
{ {
Vp9FrameContainer curr_frame = GetCurrentFrame(state); Vp9FrameContainer curr_frame = GetCurrentFrame();
current_frame_info = curr_frame.info; current_frame_info = curr_frame.info;
bitstream = std::move(curr_frame.bit_stream); bitstream = std::move(curr_frame.bit_stream);
} }
@ -786,12 +888,16 @@ void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray(); std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
// Write headers and frame to buffer // Write headers and frame to buffer
frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size()); frame_scratch.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame.begin()); std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame_scratch.begin());
std::copy(compressed_header.begin(), compressed_header.end(), std::copy(compressed_header.begin(), compressed_header.end(),
frame.begin() + uncompressed_header.size()); frame_scratch.begin() + uncompressed_header.size());
std::copy(bitstream.begin(), bitstream.end(), std::copy(bitstream.begin(), bitstream.end(),
frame.begin() + uncompressed_header.size() + compressed_header.size()); frame_scratch.begin() + uncompressed_header.size() + compressed_header.size());
vp9_hidden_frame = WasFrameHidden();
return GetFrameBytes();
} }
VpxRangeEncoder::VpxRangeEncoder() { VpxRangeEncoder::VpxRangeEncoder() {
@ -944,4 +1050,4 @@ const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
return byte_array; return byte_array;
} }
} // namespace Tegra::Decoder } // namespace Tegra::Decoders

View File

@ -10,6 +10,7 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
#include "common/stream.h" #include "common/stream.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/codecs/vp9_types.h" #include "video_core/host1x/codecs/vp9_types.h"
#include "video_core/host1x/nvdec_common.h" #include "video_core/host1x/nvdec_common.h"
@ -19,7 +20,7 @@ namespace Host1x {
class Host1x; class Host1x;
} // namespace Host1x } // namespace Host1x
namespace Decoder { namespace Decoders {
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the /// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
/// VP9 header bitstreams. /// VP9 header bitstreams.
@ -110,21 +111,32 @@ private:
std::vector<u8> byte_array; std::vector<u8> byte_array;
}; };
class VP9 { class VP9 final : public Decoder {
public: public:
explicit VP9(Host1x::Host1x& host1x); explicit VP9(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
~VP9(); Host1x::FrameQueue& frame_queue);
~VP9() override;
VP9(const VP9&) = delete; VP9(const VP9&) = delete;
VP9& operator=(const VP9&) = delete; VP9& operator=(const VP9&) = delete;
VP9(VP9&&) = default; VP9(VP9&&) = delete;
VP9& operator=(VP9&&) = delete; VP9& operator=(VP9&&) = delete;
/// Composes the VP9 frame from the GPU state information. [[nodiscard]] std::span<const u8> ComposeFrame() override;
/// Based on the official VP9 spec documentation
void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
std::tuple<u64, u64> GetProgressiveOffsets() override;
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
bool IsInterlaced() override {
return false;
}
std::string_view GetCurrentCodecName() const override {
return "VP9";
}
private:
/// Returns true if the most recent frame was a hidden frame. /// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const { [[nodiscard]] bool WasFrameHidden() const {
return !current_frame_info.show_frame; return !current_frame_info.show_frame;
@ -132,10 +144,9 @@ public:
/// Returns a const span to the composed frame data. /// Returns a const span to the composed frame data.
[[nodiscard]] std::span<const u8> GetFrameBytes() const { [[nodiscard]] std::span<const u8> GetFrameBytes() const {
return frame; return frame_scratch;
} }
private:
/// Generates compressed header probability updates in the bitstream writer /// Generates compressed header probability updates in the bitstream writer
template <typename T, std::size_t N> template <typename T, std::size_t N>
void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
@ -167,23 +178,22 @@ private:
/// Write motion vector probability updates. 6.3.17 in the spec /// Write motion vector probability updates. 6.3.17 in the spec
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
void WriteSegmentation(VpxBitStreamWriter& writer);
/// Returns VP9 information from NVDEC provided offset and size /// Returns VP9 information from NVDEC provided offset and size
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo( [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo();
const Host1x::NvdecCommon::NvdecRegisters& state);
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
/// Returns frame to be decoded after buffering /// Returns frame to be decoded after buffering
[[nodiscard]] Vp9FrameContainer GetCurrentFrame( [[nodiscard]] Vp9FrameContainer GetCurrentFrame();
const Host1x::NvdecCommon::NvdecRegisters& state);
/// Use NVDEC providied information to compose the headers for the current frame /// Use NVDEC providied information to compose the headers for the current frame
[[nodiscard]] std::vector<u8> ComposeCompressedHeader(); [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
[[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader(); [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
Host1x::Host1x& host1x; Common::ScratchBuffer<u8> frame_scratch;
Common::ScratchBuffer<u8> frame;
std::array<s8, 4> loop_filter_ref_deltas{}; std::array<s8, 4> loop_filter_ref_deltas{};
std::array<s8, 2> loop_filter_mode_deltas{}; std::array<s8, 2> loop_filter_mode_deltas{};
@ -192,9 +202,11 @@ private:
std::array<Vp9EntropyProbs, 4> frame_ctxs{}; std::array<Vp9EntropyProbs, 4> frame_ctxs{};
bool swap_ref_indices{}; bool swap_ref_indices{};
Segmentation last_segmentation{};
PictureInfo current_picture_info{};
Vp9PictureInfo current_frame_info{}; Vp9PictureInfo current_frame_info{};
Vp9EntropyProbs prev_frame_probs{}; Vp9EntropyProbs prev_frame_probs{};
}; };
} // namespace Decoder } // namespace Decoders
} // namespace Tegra } // namespace Tegra

View File

@ -11,7 +11,14 @@
namespace Tegra { namespace Tegra {
namespace Decoder { namespace Decoders {
enum class Vp9SurfaceIndex : u32 {
Last = 0,
Golden = 1,
AltRef = 2,
Current = 3,
};
struct Vp9FrameDimensions { struct Vp9FrameDimensions {
s16 width; s16 width;
s16 height; s16 height;
@ -48,11 +55,13 @@ enum class TxMode {
}; };
struct Segmentation { struct Segmentation {
constexpr bool operator==(const Segmentation& rhs) const = default;
u8 enabled; u8 enabled;
u8 update_map; u8 update_map;
u8 temporal_update; u8 temporal_update;
u8 abs_delta; u8 abs_delta;
std::array<u32, 8> feature_mask; std::array<std::array<u8, 4>, 8> feature_enabled;
std::array<std::array<s16, 4>, 8> feature_data; std::array<std::array<s16, 4>, 8> feature_data;
}; };
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
@ -190,7 +199,17 @@ struct PictureInfo {
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
struct EntropyProbs { struct EntropyProbs {
INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 std::array<u8, 10 * 10 * 8> kf_bmode_prob; ///< 0x0000
std::array<u8, 10 * 10 * 1> kf_bmode_probB; ///< 0x0320
std::array<u8, 3> ref_pred_probs; ///< 0x0384
std::array<u8, 7> mb_segment_tree_probs; ///< 0x0387
std::array<u8, 3> segment_pred_probs; ///< 0x038E
std::array<u8, 4> ref_scores; ///< 0x0391
std::array<u8, 2> prob_comppred; ///< 0x0395
INSERT_PADDING_BYTES_NOINIT(9); ///< 0x0397
std::array<u8, 10 * 8> kf_uv_mode_prob; ///< 0x03A0
std::array<u8, 10 * 1> kf_uv_mode_probB; ///< 0x03F0
INSERT_PADDING_BYTES_NOINIT(6); ///< 0x03FA
std::array<u8, 28> inter_mode_prob; ///< 0x0400 std::array<u8, 28> inter_mode_prob; ///< 0x0400
std::array<u8, 4> intra_inter_prob; ///< 0x041C std::array<u8, 4> intra_inter_prob; ///< 0x041C
INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
@ -302,5 +321,5 @@ ASSERT_POSITION(class_0_fr, 0x560);
ASSERT_POSITION(coef_probs, 0x5A0); ASSERT_POSITION(coef_probs, 0x5A0);
#undef ASSERT_POSITION #undef ASSERT_POSITION
}; // namespace Decoder }; // namespace Decoders
}; // namespace Tegra }; // namespace Tegra

View File

@ -27,6 +27,7 @@ void Control::ProcessMethod(Method method, u32 argument) {
} }
void Control::Execute(u32 data) { void Control::Execute(u32 data) {
LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value);
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value); host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
} }

View File

@ -6,9 +6,7 @@
#include "common/common_types.h" #include "common/common_types.h"
namespace Tegra { namespace Tegra::Host1x {
namespace Host1x {
class Host1x; class Host1x;
class Nvdec; class Nvdec;
@ -31,10 +29,8 @@ private:
/// For Host1x, execute is waiting on a syncpoint previously written into the state /// For Host1x, execute is waiting on a syncpoint previously written into the state
void Execute(u32 data); void Execute(u32 data);
u32 syncpoint_value{};
Host1x& host1x; Host1x& host1x;
u32 syncpoint_value{};
}; };
} // namespace Host1x } // namespace Tegra::Host1x
} // namespace Tegra

View File

@ -5,7 +5,9 @@
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/settings.h" #include "common/settings.h"
#include "core/memory.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h" #include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/memory_manager.h"
extern "C" { extern "C" {
#ifdef LIBVA_FOUND #ifdef LIBVA_FOUND
@ -149,6 +151,7 @@ bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
} }
} }
LOG_INFO(HW_GPU, "Hardware decoding is disabled due to implementation issues, using CPU.");
return false; return false;
} }
@ -183,8 +186,8 @@ bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
return true; return true;
} }
DecoderContext::DecoderContext(const Decoder& decoder) { DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
m_codec_context = avcodec_alloc_context3(decoder.GetCodec()); m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0); av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
m_codec_context->thread_count = 0; m_codec_context->thread_count = 0;
m_codec_context->thread_type &= ~FF_THREAD_FRAME; m_codec_context->thread_type &= ~FF_THREAD_FRAME;
@ -216,6 +219,25 @@ bool DecoderContext::OpenContext(const Decoder& decoder) {
} }
bool DecoderContext::SendPacket(const Packet& packet) { bool DecoderContext::SendPacket(const Packet& packet) {
m_temp_frame = std::make_shared<Frame>();
m_got_frame = 0;
// Android can randomly crash when calling decode directly, so skip.
// TODO update ffmpeg and hope that fixes it.
#ifndef ANDROID
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
m_decode_order = true;
auto* codec{ffcodec(m_decoder.GetCodec())};
if (const int ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(),
&m_got_frame, packet.GetPacket());
ret < 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", AVError(ret));
return false;
}
return true;
}
#endif
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) { if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret)); LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
return false; return false;
@ -224,139 +246,73 @@ bool DecoderContext::SendPacket(const Packet& packet) {
return true; return true;
} }
std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) { std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
auto dst_frame = std::make_unique<Frame>(); // Android can randomly crash when calling decode directly, so skip.
// TODO update ffmpeg and hope that fixes it.
#ifndef ANDROID
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
m_decode_order = true;
auto* codec{ffcodec(m_decoder.GetCodec())};
int ret{0};
const auto ReceiveImpl = [&](AVFrame* frame) { if (m_got_frame == 0) {
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) { Packet packet{{}};
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret)); auto* pkt = packet.GetPacket();
return false; pkt->data = nullptr;
pkt->size = 0;
ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, pkt);
m_codec_context->has_b_frames = 0;
} }
*out_is_interlaced = if (m_got_frame == 0 || ret < 0) {
#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59 LOG_ERROR(Service_NVDRV, "Failed to receive a frame! error {}", ret);
(frame->flags & AV_FRAME_FLAG_INTERLACED) != 0; return {};
#else }
frame->interlaced_frame != 0; } else
#endif #endif
return true; {
};
if (m_codec_context->hw_device_ctx) { const auto ReceiveImpl = [&](AVFrame* frame) {
// If we have a hardware context, make a separate frame here to receive the if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
// hardware result before sending it to the output. LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
Frame intermediate_frame; return false;
}
if (!ReceiveImpl(intermediate_frame.GetFrame())) { return true;
return {}; };
}
dst_frame->SetFormat(PreferredGpuFormat); if (m_codec_context->hw_device_ctx) {
if (const int ret = // If we have a hardware context, make a separate frame here to receive the
av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0); // hardware result before sending it to the output.
ret < 0) { Frame intermediate_frame;
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
return {}; if (!ReceiveImpl(intermediate_frame.GetFrame())) {
} return {};
} else { }
// Otherwise, decode the frame as normal.
if (!ReceiveImpl(dst_frame->GetFrame())) { m_temp_frame->SetFormat(PreferredGpuFormat);
return {}; if (const int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(),
intermediate_frame.GetFrame(), 0);
ret < 0) {
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
return {};
}
} else {
// Otherwise, decode the frame as normal.
if (!ReceiveImpl(m_temp_frame->GetFrame())) {
return {};
}
} }
} }
return dst_frame; #if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
} m_temp_frame->GetFrame()->interlaced_frame =
(m_temp_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) != 0;
DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) { #endif
const AVFilter* buffer_src = avfilter_get_by_name("buffer"); return std::move(m_temp_frame);
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
AVFilterInOut* inputs = avfilter_inout_alloc();
AVFilterInOut* outputs = avfilter_inout_alloc();
SCOPE_EXIT {
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
};
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
// so just use 1/1 to make buffer filter happy
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
m_filter_graph = avfilter_graph_alloc();
int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
nullptr, m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
return;
}
ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
return;
}
inputs->name = av_strdup("out");
inputs->filter_ctx = m_sink_context;
inputs->pad_idx = 0;
inputs->next = nullptr;
outputs->name = av_strdup("in");
outputs->filter_ctx = m_source_context;
outputs->pad_idx = 0;
outputs->next = nullptr;
const char* description = "yadif=1:-1:0";
ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
return;
}
ret = avfilter_graph_config(m_filter_graph, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
return;
}
m_initialized = true;
}
bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
AV_BUFFERSRC_FLAG_KEEP_REF);
ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
return false;
}
return true;
}
std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
auto dst_frame = std::make_unique<Frame>();
const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
return {};
}
if (ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
return {};
}
return dst_frame;
}
DeinterlaceFilter::~DeinterlaceFilter() {
avfilter_graph_free(&m_filter_graph);
} }
void DecodeApi::Reset() { void DecodeApi::Reset() {
m_deinterlace_filter.reset();
m_hardware_context.reset(); m_hardware_context.reset();
m_decoder_context.reset(); m_decoder_context.reset();
m_decoder.reset(); m_decoder.reset();
@ -382,43 +338,14 @@ bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
return true; return true;
} }
bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) { bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
FFmpeg::Packet packet(packet_data); FFmpeg::Packet packet(packet_data);
return m_decoder_context->SendPacket(packet); return m_decoder_context->SendPacket(packet);
} }
void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) { std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
// Receive raw frame from decoder. // Receive raw frame from decoder.
bool is_interlaced; return m_decoder_context->ReceiveFrame();
auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
if (!frame) {
return;
}
if (!is_interlaced) {
// If the frame is not interlaced, we can pend it now.
frame_queue.push(std::move(frame));
} else {
// Create the deinterlacer if needed.
if (!m_deinterlace_filter) {
m_deinterlace_filter.emplace(*frame);
}
// Add the frame we just received.
if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
return;
}
// Pend output fields.
while (true) {
auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
if (!filter_frame) {
break;
}
frame_queue.push(std::move(filter_frame));
}
}
} }
} // namespace FFmpeg } // namespace FFmpeg

View File

@ -20,17 +20,20 @@ extern "C" {
#endif #endif
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h> #include <libavutil/opt.h>
#ifndef ANDROID
#include <libavcodec/codec_internal.h>
#endif
#if defined(__GNUC__) || defined(__clang__) #if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#endif #endif
} }
namespace Tegra {
class MemoryManager;
}
namespace FFmpeg { namespace FFmpeg {
class Packet; class Packet;
@ -90,6 +93,10 @@ public:
return m_frame->data[plane]; return m_frame->data[plane];
} }
const u8* GetPlane(int plane) const {
return m_frame->data[plane];
}
u8** GetPlanes() const { u8** GetPlanes() const {
return m_frame->data; return m_frame->data;
} }
@ -98,6 +105,14 @@ public:
m_frame->format = format; m_frame->format = format;
} }
bool IsInterlaced() const {
return m_frame->interlaced_frame != 0;
}
bool IsHardwareDecoded() const {
return m_frame->hw_frames_ctx != nullptr;
}
AVFrame* GetFrame() const { AVFrame* GetFrame() const {
return m_frame; return m_frame;
} }
@ -160,33 +175,22 @@ public:
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt); void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
bool OpenContext(const Decoder& decoder); bool OpenContext(const Decoder& decoder);
bool SendPacket(const Packet& packet); bool SendPacket(const Packet& packet);
std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced); std::shared_ptr<Frame> ReceiveFrame();
AVCodecContext* GetCodecContext() const { AVCodecContext* GetCodecContext() const {
return m_codec_context; return m_codec_context;
} }
bool UsingDecodeOrder() const {
return m_decode_order;
}
private: private:
const Decoder& m_decoder;
AVCodecContext* m_codec_context{}; AVCodecContext* m_codec_context{};
}; s32 m_got_frame{};
std::shared_ptr<Frame> m_temp_frame{};
// Wraps an AVFilterGraph. bool m_decode_order{};
class DeinterlaceFilter {
public:
YUZU_NON_COPYABLE(DeinterlaceFilter);
YUZU_NON_MOVEABLE(DeinterlaceFilter);
explicit DeinterlaceFilter(const Frame& frame);
~DeinterlaceFilter();
bool AddSourceFrame(const Frame& frame);
std::unique_ptr<Frame> DrainSinkFrame();
private:
AVFilterGraph* m_filter_graph{};
AVFilterContext* m_source_context{};
AVFilterContext* m_sink_context{};
bool m_initialized{};
}; };
class DecodeApi { class DecodeApi {
@ -200,14 +204,17 @@ public:
bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec); bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
void Reset(); void Reset();
bool SendPacket(std::span<const u8> packet_data, size_t configuration_size); bool UsingDecodeOrder() const {
void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue); return m_decoder_context->UsingDecodeOrder();
}
bool SendPacket(std::span<const u8> packet_data);
std::shared_ptr<Frame> ReceiveFrame();
private: private:
std::optional<FFmpeg::Decoder> m_decoder; std::optional<FFmpeg::Decoder> m_decoder;
std::optional<FFmpeg::DecoderContext> m_decoder_context; std::optional<FFmpeg::DecoderContext> m_decoder_context;
std::optional<FFmpeg::HardwareContext> m_hardware_context; std::optional<FFmpeg::HardwareContext> m_hardware_context;
std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
}; };
} // namespace FFmpeg } // namespace FFmpeg

View File

@ -3,10 +3,10 @@
#include "core/core.h" #include "core/core.h"
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
#include "video_core/host1x/nvdec.h"
#include "video_core/host1x/vic.h"
namespace Tegra { namespace Tegra::Host1x {
namespace Host1x {
Host1x::Host1x(Core::System& system_) Host1x::Host1x(Core::System& system_)
: system{system_}, syncpoint_manager{}, : system{system_}, syncpoint_manager{},
@ -15,6 +15,22 @@ Host1x::Host1x(Core::System& system_)
Host1x::~Host1x() = default; Host1x::~Host1x() = default;
} // namespace Host1x void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) {
switch (type) {
case ChannelType::NvDec:
devices[fd] = std::make_unique<Tegra::Host1x::Nvdec>(*this, fd, syncpt, frame_queue);
break;
case ChannelType::VIC:
devices[fd] = std::make_unique<Tegra::Host1x::Vic>(*this, fd, syncpt, frame_queue);
break;
default:
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast<u32>(type));
break;
}
}
} // namespace Tegra void Host1x::StopDevice(s32 fd, ChannelType type) {
devices.erase(fd);
}
} // namespace Tegra::Host1x

View File

@ -3,9 +3,14 @@
#pragma once #pragma once
#include <unordered_map>
#include <unordered_set>
#include <queue>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/address_space.h" #include "common/address_space.h"
#include "video_core/cdma_pusher.h"
#include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/host1x/syncpoint_manager.h" #include "video_core/host1x/syncpoint_manager.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -14,15 +19,137 @@ namespace Core {
class System; class System;
} // namespace Core } // namespace Core
namespace Tegra { namespace FFmpeg {
class Frame;
} // namespace FFmpeg
namespace Host1x { namespace Tegra::Host1x {
class Nvdec;
class FrameQueue {
public:
void Open(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.insert({fd, {}});
m_decode_order.insert({fd, {}});
}
void Close(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.erase(fd);
m_decode_order.erase(fd);
}
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
std::scoped_lock l{m_mutex};
// Vic does not know which nvdec is producing frames for it, so search all the fds here for
// the given offset.
for (auto& map : m_presentation_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
for (auto& map : m_decode_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
return -1;
}
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_presentation_order.find(fd);
if (map == m_presentation_order.end()) {
return;
}
map->second.emplace_back(offset, std::move(frame));
}
void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_decode_order.find(fd);
if (map == m_decode_order.end()) {
return;
}
map->second.insert_or_assign(offset, std::move(frame));
}
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
if (fd == -1) {
return {};
}
std::scoped_lock l{m_mutex};
auto present_map = m_presentation_order.find(fd);
if (present_map != m_presentation_order.end() && present_map->second.size() > 0) {
return GetPresentOrderLocked(fd);
}
auto decode_map = m_decode_order.find(fd);
if (decode_map != m_decode_order.end() && decode_map->second.size() > 0) {
return GetDecodeOrderLocked(fd, offset);
}
return {};
}
private:
std::shared_ptr<FFmpeg::Frame> GetPresentOrderLocked(s32 fd) {
auto map = m_presentation_order.find(fd);
if (map == m_presentation_order.end() || map->second.size() == 0) {
return {};
}
auto frame = std::move(map->second.front().second);
map->second.pop_front();
return frame;
}
std::shared_ptr<FFmpeg::Frame> GetDecodeOrderLocked(s32 fd, u64 offset) {
auto map = m_decode_order.find(fd);
if (map == m_decode_order.end() || map->second.size() == 0) {
return {};
}
auto it = map->second.find(offset);
if (it == map->second.end()) {
return {};
}
return std::move(map->second.extract(it).mapped());
}
using FramePtr = std::shared_ptr<FFmpeg::Frame>;
std::mutex m_mutex{};
std::unordered_map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
std::unordered_map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
};
enum class ChannelType : u32 {
MsEnc = 0,
VIC = 1,
GPU = 2,
NvDec = 3,
Display = 4,
NvJpg = 5,
TSec = 6,
Max = 7,
};
class Host1x { class Host1x {
public: public:
explicit Host1x(Core::System& system); explicit Host1x(Core::System& system);
~Host1x(); ~Host1x();
Core::System& System() {
return system;
}
SyncpointManager& GetSyncpointManager() { SyncpointManager& GetSyncpointManager() {
return syncpoint_manager; return syncpoint_manager;
} }
@ -55,14 +182,25 @@ public:
return *allocator; return *allocator;
} }
void StartDevice(s32 fd, ChannelType type, u32 syncpt);
void StopDevice(s32 fd, ChannelType type);
void PushEntries(s32 fd, ChCommandHeaderList&& entries) {
auto it = devices.find(fd);
if (it == devices.end()) {
return;
}
it->second->PushEntries(std::move(entries));
}
private: private:
Core::System& system; Core::System& system;
SyncpointManager syncpoint_manager; SyncpointManager syncpoint_manager;
Tegra::MaxwellDeviceMemoryManager memory_manager; Tegra::MaxwellDeviceMemoryManager memory_manager;
Tegra::MemoryManager gmmu_manager; Tegra::MemoryManager gmmu_manager;
std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator; std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator;
FrameQueue frame_queue;
std::unordered_map<s32, std::unique_ptr<CDmaPusher>> devices;
}; };
} // namespace Host1x } // namespace Tegra::Host1x
} // namespace Tegra

View File

@ -2,6 +2,12 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h" #include "common/assert.h"
#include "common/polyfill_thread.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/codecs/vp8.h"
#include "video_core/host1x/codecs/vp9.h"
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
#include "video_core/host1x/nvdec.h" #include "video_core/host1x/nvdec.h"
@ -10,37 +16,69 @@ namespace Tegra::Host1x {
#define NVDEC_REG_INDEX(field_name) \ #define NVDEC_REG_INDEX(field_name) \
(offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
Nvdec::Nvdec(Host1x& host1x_) Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt, FrameQueue& frame_queue_)
: host1x(host1x_), state{}, codec(std::make_unique<Codec>(host1x, state)) {} : CDmaPusher{host1x_, id_}, id{id_}, syncpoint{syncpt}, frame_queue{frame_queue_} {
LOG_INFO(HW_GPU, "Created nvdec {}", id);
frame_queue.Open(id);
}
Nvdec::~Nvdec() = default; Nvdec::~Nvdec() {
LOG_INFO(HW_GPU, "Destroying nvdec {}", id);
}
void Nvdec::ProcessMethod(u32 method, u32 argument) { void Nvdec::ProcessMethod(u32 method, u32 argument) {
state.reg_array[method] = static_cast<u64>(argument) << 8; regs.reg_array[method] = argument;
switch (method) { switch (method) {
case NVDEC_REG_INDEX(set_codec_id): case NVDEC_REG_INDEX(set_codec_id):
codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); CreateDecoder(static_cast<NvdecCommon::VideoCodec>(argument));
break; break;
case NVDEC_REG_INDEX(execute): case NVDEC_REG_INDEX(execute): {
if (wait_needed) {
std::this_thread::sleep_for(std::chrono::milliseconds(32));
wait_needed = false;
}
Execute(); Execute();
break; } break;
} }
} }
std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() { void Nvdec::CreateDecoder(NvdecCommon::VideoCodec codec) {
return codec->GetCurrentFrame(); if (decoder.get()) {
return;
}
switch (codec) {
case NvdecCommon::VideoCodec::H264:
decoder = std::make_unique<Decoders::H264>(host1x, regs, id, frame_queue);
break;
case NvdecCommon::VideoCodec::VP8:
decoder = std::make_unique<Decoders::VP8>(host1x, regs, id, frame_queue);
break;
case NvdecCommon::VideoCodec::VP9:
decoder = std::make_unique<Decoders::VP9>(host1x, regs, id, frame_queue);
break;
default:
UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
break;
}
LOG_INFO(HW_GPU, "Created decoder {} for id {}", decoder->GetCurrentCodecName(), id);
} }
void Nvdec::Execute() { void Nvdec::Execute() {
switch (codec->GetCurrentCodec()) { if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Off) [[unlikely]] {
// Signalling syncpts too fast can cause games to get stuck as they don't expect a <1ms
// execution time. Sleep for half of a 60 fps frame just in case.
std::this_thread::sleep_for(std::chrono::milliseconds(8));
return;
}
switch (decoder->GetCurrentCodec()) {
case NvdecCommon::VideoCodec::H264: case NvdecCommon::VideoCodec::H264:
case NvdecCommon::VideoCodec::VP8: case NvdecCommon::VideoCodec::VP8:
case NvdecCommon::VideoCodec::VP9: case NvdecCommon::VideoCodec::VP9:
codec->Decode(); decoder->Decode();
break; break;
default: default:
UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName()); UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
break; break;
} }
} }

View File

@ -5,33 +5,47 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/host1x/codecs/codec.h" #include "video_core/cdma_pusher.h"
#include "video_core/host1x/codecs/decoder.h"
namespace Tegra { namespace Tegra {
namespace Host1x { namespace Host1x {
class Host1x; class Host1x;
class FrameQueue;
class Nvdec { class Nvdec final : public CDmaPusher {
public: public:
explicit Nvdec(Host1x& host1x); explicit Nvdec(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue_);
~Nvdec(); ~Nvdec();
/// Writes the method into the state, Invoke Execute() if encountered /// Writes the method into the state, Invoke Execute() if encountered
void ProcessMethod(u32 method, u32 argument); void ProcessMethod(u32 method, u32 arg) override;
/// Return most recently decoded frame u32 GetSyncpoint() const {
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame(); return syncpoint;
}
void SetWait() {
wait_needed = true;
}
private: private:
/// Create the decoder when the codec id is set
void CreateDecoder(NvdecCommon::VideoCodec codec);
/// Invoke codec to decode a frame /// Invoke codec to decode a frame
void Execute(); void Execute();
Host1x& host1x; s32 id;
NvdecCommon::NvdecRegisters state; u32 syncpoint;
std::unique_ptr<Codec> codec; FrameQueue& frame_queue;
NvdecCommon::NvdecRegisters regs{};
std::unique_ptr<Decoder> decoder;
bool wait_needed{false};
}; };
} // namespace Host1x } // namespace Host1x

View File

@ -17,6 +17,17 @@ enum class VideoCodec : u64 {
VP9 = 0x9, VP9 = 0x9,
}; };
struct Offset {
constexpr u64 Address() const noexcept {
return offset << 8;
}
private:
u64 offset;
};
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
static_assert(sizeof(Offset) == 0x8, "Offset has the wrong size!");
// NVDEC should use a 32-bit address space, but is mapped to 64-bit, // NVDEC should use a 32-bit address space, but is mapped to 64-bit,
// doubling the sizes here is compensating for that. // doubling the sizes here is compensating for that.
struct NvdecRegisters { struct NvdecRegisters {
@ -38,29 +49,40 @@ struct NvdecRegisters {
BitField<17, 1, u64> all_intra_frame; BitField<17, 1, u64> all_intra_frame;
}; };
} control_params; } control_params;
u64 picture_info_offset; ///< 0x0808 Offset picture_info_offset; ///< 0x0808
u64 frame_bitstream_offset; ///< 0x0810 Offset frame_bitstream_offset; ///< 0x0810
u64 frame_number; ///< 0x0818 u64 frame_number; ///< 0x0818
u64 h264_slice_data_offsets; ///< 0x0820 Offset h264_slice_data_offsets; ///< 0x0820
u64 h264_mv_dump_offset; ///< 0x0828 Offset h264_mv_dump_offset; ///< 0x0828
INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
u64 frame_stats_offset; ///< 0x0848 Offset frame_stats_offset; ///< 0x0848
u64 h264_last_surface_luma_offset; ///< 0x0850 Offset h264_last_surface_luma_offset; ///< 0x0850
u64 h264_last_surface_chroma_offset; ///< 0x0858 Offset h264_last_surface_chroma_offset; ///< 0x0858
std::array<u64, 17> surface_luma_offset; ///< 0x0860 std::array<Offset, 17> surface_luma_offsets; ///< 0x0860
std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 std::array<Offset, 17> surface_chroma_offsets; ///< 0x08E8
INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970 Offset pic_scratch_buf_offset; ///< 0x0970
u64 vp8_prob_data_offset; ///< 0x0A80 Offset external_mvbuffer_offset; ///< 0x0978
u64 vp8_header_partition_buf_offset; ///< 0x0A88 INSERT_PADDING_WORDS_NOINIT(32); ///< 0x0980
INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90 Offset h264_mbhist_buffer_offset; ///< 0x0A00
u64 vp9_entropy_probs_offset; ///< 0x0B80 INSERT_PADDING_WORDS_NOINIT(30); ///< 0x0A08
u64 vp9_backward_updates_offset; ///< 0x0B88 Offset vp8_prob_data_offset; ///< 0x0A80
u64 vp9_last_frame_segmap_offset; ///< 0x0B90 Offset vp8_header_partition_buf_offset; ///< 0x0A88
u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 INSERT_PADDING_WORDS_NOINIT(28); ///< 0x0A90
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 Offset hvec_scalist_list_offset; ///< 0x0B00
u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 Offset hvec_tile_sizes_offset; ///< 0x0B08
u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 Offset hvec_filter_buffer_offset; ///< 0x0B10
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 Offset hvec_sao_buffer_offset; ///< 0x0B18
Offset hvec_slice_info_buffer_offset; ///< 0x0B20
Offset hvec_slice_group_index_offset; ///< 0x0B28
INSERT_PADDING_WORDS_NOINIT(20); ///< 0x0B30
Offset vp9_prob_tab_buffer_offset; ///< 0x0B80
Offset vp9_ctx_counter_buffer_offset; ///< 0x0B88
Offset vp9_segment_read_buffer_offset; ///< 0x0B90
Offset vp9_segment_write_buffer_offset; ///< 0x0B98
Offset vp9_tile_size_buffer_offset; ///< 0x0BA0
Offset vp9_col_mvwrite_buffer_offset; ///< 0x0BA8
Offset vp9_col_mvread_buffer_offset; ///< 0x0BB0
Offset vp9_filter_buffer_offset; ///< 0x0BB8
}; };
std::array<u64, NUM_REGS> reg_array; std::array<u64, NUM_REGS> reg_array;
}; };
@ -81,16 +103,16 @@ ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
ASSERT_REG_POSITION(frame_stats_offset, 0x109); ASSERT_REG_POSITION(frame_stats_offset, 0x109);
ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
ASSERT_REG_POSITION(surface_luma_offset, 0x10C); ASSERT_REG_POSITION(surface_luma_offsets, 0x10C);
ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); ASSERT_REG_POSITION(surface_chroma_offsets, 0x11D);
ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150); ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150);
ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151); ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151);
ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); ASSERT_REG_POSITION(vp9_prob_tab_buffer_offset, 0x170);
ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); ASSERT_REG_POSITION(vp9_ctx_counter_buffer_offset, 0x171);
ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); ASSERT_REG_POSITION(vp9_segment_read_buffer_offset, 0x172);
ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); ASSERT_REG_POSITION(vp9_segment_write_buffer_offset, 0x173);
ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); ASSERT_REG_POSITION(vp9_col_mvwrite_buffer_offset, 0x175);
ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); ASSERT_REG_POSITION(vp9_col_mvread_buffer_offset, 0x176);
#undef ASSERT_REG_POSITION #undef ASSERT_REG_POSITION

View File

@ -1,50 +0,0 @@
// SPDX-FileCopyrightText: Ryujinx Team and Contributors
// SPDX-License-Identifier: MIT
#include <algorithm>
#include "sync_manager.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/syncpoint_manager.h"
namespace Tegra {
namespace Host1x {
SyncptIncrManager::SyncptIncrManager(Host1x& host1x_) : host1x(host1x_) {}
SyncptIncrManager::~SyncptIncrManager() = default;
void SyncptIncrManager::Increment(u32 id) {
increments.emplace_back(0, 0, id, true);
IncrementAllDone();
}
u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
const u32 handle = current_id++;
increments.emplace_back(handle, class_id, id);
return handle;
}
void SyncptIncrManager::SignalDone(u32 handle) {
const auto done_incr =
std::find_if(increments.begin(), increments.end(),
[handle](const SyncptIncr& incr) { return incr.id == handle; });
if (done_incr != increments.cend()) {
done_incr->complete = true;
}
IncrementAllDone();
}
void SyncptIncrManager::IncrementAllDone() {
std::size_t done_count = 0;
for (; done_count < increments.size(); ++done_count) {
if (!increments[done_count].complete) {
break;
}
auto& syncpoint_manager = host1x.GetSyncpointManager();
syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id);
syncpoint_manager.IncrementHost(increments[done_count].syncpt_id);
}
increments.erase(increments.begin(), increments.begin() + done_count);
}
} // namespace Host1x
} // namespace Tegra

View File

@ -1,53 +0,0 @@
// SPDX-FileCopyrightText: Ryujinx Team and Contributors
// SPDX-License-Identifier: MIT
#pragma once
#include <mutex>
#include <vector>
#include "common/common_types.h"
namespace Tegra {
namespace Host1x {
class Host1x;
struct SyncptIncr {
u32 id;
u32 class_id;
u32 syncpt_id;
bool complete;
SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
: id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
};
class SyncptIncrManager {
public:
explicit SyncptIncrManager(Host1x& host1x);
~SyncptIncrManager();
/// Add syncpoint id and increment all
void Increment(u32 id);
/// Returns a handle to increment later
u32 IncrementWhenDone(u32 class_id, u32 id);
/// IncrememntAllDone, including handle
void SignalDone(u32 handle);
/// Increment all sequential pending increments that are already done.
void IncrementAllDone();
private:
std::vector<SyncptIncr> increments;
std::mutex increment_lock;
u32 current_id{};
Host1x& host1x;
};
} // namespace Host1x
} // namespace Tegra

View File

@ -18,7 +18,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
return {}; return {};
} }
std::unique_lock lk(guard); std::scoped_lock lk(guard);
if (syncpoint.load(std::memory_order_relaxed) >= expected_value) { if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
action(); action();
return {}; return {};
@ -35,7 +35,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage, void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
const ActionHandle& handle) { const ActionHandle& handle) {
std::unique_lock lk(guard); std::scoped_lock lk(guard);
// We want to ensure the iterator still exists prior to erasing it // We want to ensure the iterator still exists prior to erasing it
// Otherwise, if an invalid iterator was passed in then it could lead to UB // Otherwise, if an invalid iterator was passed in then it could lead to UB
@ -78,7 +78,7 @@ void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_var
std::list<RegisteredAction>& action_storage) { std::list<RegisteredAction>& action_storage) {
auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1}; auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
std::unique_lock lk(guard); std::scoped_lock lk(guard);
auto it = action_storage.begin(); auto it = action_storage.begin();
while (it != action_storage.end()) { while (it != action_storage.end()) {
if (it->expected_value > new_value) { if (it->expected_value > new_value) {

File diff suppressed because it is too large Load Diff

View File

@ -3,65 +3,646 @@
#pragma once #pragma once
#include <condition_variable>
#include <functional>
#include <memory> #include <memory>
#include <mutex>
#include <thread>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
#include "video_core/cdma_pusher.h"
struct SwsContext; namespace Tegra::Host1x {
namespace Tegra {
namespace Host1x {
class Host1x; class Host1x;
class Nvdec; class Nvdec;
union VicConfig;
class Vic { struct Pixel {
u16 r;
u16 g;
u16 b;
u16 a;
};
// One underscore represents separate pixels.
// Double underscore represents separate planes.
// _N represents chroma subsampling, not a separate pixel.
enum class VideoPixelFormat : u32 {
A8 = 0,
L8 = 1,
A4L4 = 2,
L4A4 = 3,
R8 = 4,
A8L8 = 5,
L8A8 = 6,
R8G8 = 7,
G8R8 = 8,
B5G6R5 = 9,
R5G6B5 = 10,
B6G5R5 = 11,
R5G5B6 = 12,
A1B5G5R5 = 13,
A1R5G5B5 = 14,
B5G5R5A1 = 15,
R5G5B5A1 = 16,
A5B5G5R1 = 17,
A5R1G5B5 = 18,
B5G5R1A5 = 19,
R1G5B5A5 = 20,
X1B5G5R5 = 21,
X1R5G5B5 = 22,
B5G5R5X1 = 23,
R5G5B5X1 = 24,
A4B4G5R4 = 25,
A4R4G4B4 = 26,
B4G4R4A4 = 27,
R4G4B4A4 = 28,
B8G8R8 = 29,
R8G8B8 = 30,
A8B8G8R8 = 31,
A8R8G8B8 = 32,
B8G8R8A8 = 33,
R8G8B8A8 = 34,
X8B8G8R8 = 35,
X8R8G8B8 = 36,
B8G8R8X8 = 37,
R8G8B8X8 = 38,
A8B10G10R10 = 39,
A2R10G10B10 = 40,
B10G10R10A2 = 41,
R10G10B10A2 = 42,
A4P4 = 43,
P4A4 = 44,
P8A8 = 45,
A8P8 = 46,
P8 = 47,
P1 = 48,
U8V8 = 49,
V8U8 = 50,
A8Y8U8V8 = 51,
V8U8Y8A8 = 52,
Y8U8V8 = 53,
Y8V8U8 = 54,
U8V8Y8 = 55,
V8U8Y8 = 56,
Y8U8_Y8V8 = 57,
Y8V8_Y8U8 = 58,
U8Y8_V8Y8 = 59,
V8Y8_U8Y8 = 60,
Y8__U8V8_N444 = 61,
Y8__V8U8_N444 = 62,
Y8__U8V8_N422 = 63,
Y8__V8U8_N422 = 64,
Y8__U8V8_N422R = 65,
Y8__V8U8_N422R = 66,
Y8__U8V8_N420 = 67,
Y8__V8U8_N420 = 68,
Y8__U8__V8_N444 = 69,
Y8__U8__V8_N422 = 70,
Y8__U8__V8_N422R = 71,
Y8__U8__V8_N420 = 72,
U8 = 73,
V8 = 74,
};
struct Offset {
constexpr u32 Address() const noexcept {
return offset << 8;
}
private:
u32 offset;
};
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!");
struct PlaneOffsets {
Offset luma;
Offset chroma_u;
Offset chroma_v;
};
static_assert(sizeof(PlaneOffsets) == 0xC, "PlaneOffsets has the wrong size!");
enum SurfaceIndex : u32 {
Current = 0,
Previous = 1,
Next = 2,
NextNoiseReduced = 3,
CurrentMotion = 4,
PreviousMotion = 5,
PreviousPreviousMotion = 6,
CombinedMotion = 7,
};
enum class DXVAHD_ALPHA_FILL_MODE : u32 {
OPAQUE = 0,
BACKGROUND = 1,
DESTINATION = 2,
SOURCE_STREAM = 3,
COMPOSITED = 4,
SOURCE_ALPHA = 5,
};
enum class DXVAHD_FRAME_FORMAT : u64 {
PROGRESSIVE = 0,
INTERLACED_TOP_FIELD_FIRST = 1,
INTERLACED_BOTTOM_FIELD_FIRST = 2,
TOP_FIELD = 3,
BOTTOM_FIELD = 4,
SUBPIC_PROGRESSIVE = 5,
SUBPIC_INTERLACED_TOP_FIELD_FIRST = 6,
SUBPIC_INTERLACED_BOTTOM_FIELD_FIRST = 7,
SUBPIC_TOP_FIELD = 8,
SUBPIC_BOTTOM_FIELD = 9,
TOP_FIELD_CHROMA_BOTTOM = 10,
BOTTOM_FIELD_CHROMA_TOP = 11,
SUBPIC_TOP_FIELD_CHROMA_BOTTOM = 12,
SUBPIC_BOTTOM_FIELD_CHROMA_TOP = 13,
};
enum class DXVAHD_DEINTERLACE_MODE_PRIVATE : u64 {
WEAVE = 0,
BOB_FIELD = 1,
BOB = 2,
NEWBOB = 3,
DISI1 = 4,
WEAVE_LUMA_BOB_FIELD_CHROMA = 5,
MAX = 0xF,
};
enum class BLK_KIND {
PITCH = 0,
GENERIC_16Bx2 = 1,
// These are unsupported in the vic
BL_NAIVE = 2,
BL_KEPLER_XBAR_RAW = 3,
VP2_TILED = 15,
};
enum class BLEND_SRCFACTC : u32 {
K1 = 0,
K1_TIMES_DST = 1,
NEG_K1_TIMES_DST = 2,
K1_TIMES_SRC = 3,
ZERO = 4,
};
enum class BLEND_DSTFACTC : u32 {
K1 = 0,
K2 = 1,
K1_TIMES_DST = 2,
NEG_K1_TIMES_DST = 3,
NEG_K1_TIMES_SRC = 4,
ZERO = 5,
ONE = 6,
};
enum class BLEND_SRCFACTA : u32 {
K1 = 0,
K2 = 1,
NEG_K1_TIMES_DST = 2,
ZERO = 3,
MAX = 7,
};
enum class BLEND_DSTFACTA : u32 {
K2 = 0,
NEG_K1_TIMES_SRC = 1,
ZERO = 2,
ONE = 3,
MAX = 7,
};
struct PipeConfig {
union {
BitField<0, 11, u32> downsample_horiz;
BitField<11, 5, u32> reserved0;
BitField<16, 11, u32> downsample_vert;
BitField<27, 5, u32> reserved1;
};
u32 reserved2;
u32 reserved3;
u32 reserved4;
};
static_assert(sizeof(PipeConfig) == 0x10, "PipeConfig has the wrong size!");
struct OutputConfig {
union {
BitField<0, 3, DXVAHD_ALPHA_FILL_MODE> alpha_fill_mode;
BitField<3, 3, u64> alpha_fill_slot;
BitField<6, 10, u64> background_a;
BitField<16, 10, u64> background_r;
BitField<26, 10, u64> background_g;
BitField<36, 10, u64> background_b;
BitField<46, 2, u64> regamma_mode;
BitField<48, 1, u64> output_flip_x;
BitField<49, 1, u64> output_flip_y;
BitField<50, 1, u64> output_transpose;
BitField<51, 1, u64> reserved1;
BitField<52, 12, u64> reserved2;
};
union {
BitField<0, 14, u32> target_rect_left;
BitField<14, 2, u32> reserved3;
BitField<16, 14, u32> target_rect_right;
BitField<30, 2, u32> reserved4;
};
union {
BitField<0, 14, u32> target_rect_top;
BitField<14, 2, u32> reserved5;
BitField<16, 14, u32> target_rect_bottom;
BitField<30, 2, u32> reserved6;
};
};
static_assert(sizeof(OutputConfig) == 0x10, "OutputConfig has the wrong size!");
struct OutputSurfaceConfig {
union {
BitField<0, 7, VideoPixelFormat> out_pixel_format;
BitField<7, 2, u32> out_chroma_loc_horiz;
BitField<9, 2, u32> out_chroma_loc_vert;
BitField<11, 4, BLK_KIND> out_block_kind;
BitField<15, 4, u32> out_block_height; // in gobs, log2
BitField<19, 3, u32> reserved0;
BitField<22, 10, u32> reserved1;
};
union {
BitField<0, 14, u32> out_surface_width; // - 1
BitField<14, 14, u32> out_surface_height; // - 1
BitField<28, 4, u32> reserved2;
};
union {
BitField<0, 14, u32> out_luma_width; // - 1
BitField<14, 14, u32> out_luma_height; // - 1
BitField<28, 4, u32> reserved3;
};
union {
BitField<0, 14, u32> out_chroma_width; // - 1
BitField<14, 14, u32> out_chroma_height; // - 1
BitField<28, 4, u32> reserved4;
};
};
static_assert(sizeof(OutputSurfaceConfig) == 0x10, "OutputSurfaceConfig has the wrong size!");
struct MatrixStruct {
union {
BitField<0, 20, s64> matrix_coeff00; // (0,0) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff10; // (1,0) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff20; // (2,0) of 4x3 conversion matrix
BitField<60, 4, u64> matrix_r_shift;
};
union {
BitField<0, 20, s64> matrix_coeff01; // (0,1) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff11; // (1,1) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff21; // (2,1) of 4x3 conversion matrix
BitField<60, 3, u64> reserved0;
BitField<63, 1, u64> matrix_enable;
};
union {
BitField<0, 20, s64> matrix_coeff02; // (0,2) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff12; // (1,2) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff22; // (2,2) of 4x3 conversion matrix
BitField<60, 4, u64> reserved1;
};
union {
BitField<0, 20, s64> matrix_coeff03; // (0,3) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff13; // (1,3) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff23; // (2,3) of 4x3 conversion matrix
BitField<60, 4, u64> reserved2;
};
};
static_assert(sizeof(MatrixStruct) == 0x20, "MatrixStruct has the wrong size!");
struct ClearRectStruct {
union {
BitField<0, 14, u32> clear_rect0_left;
BitField<14, 2, u32> reserved0;
BitField<16, 14, u32> clear_rect0_right;
BitField<30, 2, u32> reserved1;
};
union {
BitField<0, 14, u32> clear_rect0_top;
BitField<14, 2, u32> reserved2;
BitField<16, 14, u32> clear_rect0_bottom;
BitField<30, 2, u32> reserved3;
};
union {
BitField<0, 14, u32> clear_rect1_left;
BitField<14, 2, u32> reserved4;
BitField<16, 14, u32> clear_rect1_right;
BitField<30, 2, u32> reserved5;
};
union {
BitField<0, 14, u32> clear_rect1_top;
BitField<14, 2, u32> reserved6;
BitField<16, 14, u32> clear_rect1_bottom;
BitField<30, 2, u32> reserved7;
};
};
static_assert(sizeof(ClearRectStruct) == 0x10, "ClearRectStruct has the wrong size!");
struct SlotConfig {
union {
BitField<0, 1, u64> slot_enable;
BitField<1, 1, u64> denoise;
BitField<2, 1, u64> advanced_denoise;
BitField<3, 1, u64> cadence_detect;
BitField<4, 1, u64> motion_map;
BitField<5, 1, u64> motion_map_capture;
BitField<6, 1, u64> is_even;
BitField<7, 1, u64> chroma_even;
// fetch control struct
BitField<8, 1, u64> current_field_enable;
BitField<9, 1, u64> prev_field_enable;
BitField<10, 1, u64> next_field_enable;
BitField<11, 1, u64> next_nr_field_enable; // noise reduction
BitField<12, 1, u64> current_motion_field_enable;
BitField<13, 1, u64> prev_motion_field_enable;
BitField<14, 1, u64> prev_prev_motion_field_enable;
BitField<15, 1, u64> combined_motion_field_enable;
BitField<16, 4, DXVAHD_FRAME_FORMAT> frame_format;
BitField<20, 2, u64> filter_length_y; // 0: 1-tap, 1: 2-tap, 2: 5-tap, 3: 10-tap
BitField<22, 2, u64> filter_length_x;
BitField<24, 12, u64> panoramic;
BitField<36, 22, u64> reserved1;
BitField<58, 6, u64> detail_filter_clamp;
};
union {
BitField<0, 10, u64> filter_noise;
BitField<10, 10, u64> filter_detail;
BitField<20, 10, u64> chroma_noise;
BitField<30, 10, u64> chroma_detail;
BitField<40, 4, DXVAHD_DEINTERLACE_MODE_PRIVATE> deinterlace_mode;
BitField<44, 3, u64> motion_accumulation_weight;
BitField<47, 11, u64> noise_iir;
BitField<58, 4, u64> light_level;
BitField<62, 2, u64> reserved4;
};
union {
BitField<0, 10, u64> soft_clamp_low;
BitField<10, 10, u64> soft_clamp_high;
BitField<20, 3, u64> reserved5;
BitField<23, 9, u64> reserved6;
BitField<32, 10, u64> planar_alpha;
BitField<42, 1, u64> constant_alpha;
BitField<43, 3, u64> stereo_interleave;
BitField<46, 1, u64> clip_enabled;
BitField<47, 8, u64> clear_rect_mask;
BitField<55, 2, u64> degamma_mode;
BitField<57, 1, u64> reserved7;
BitField<58, 1, u64> decompress_enable;
BitField<59, 5, u64> reserved9;
};
union {
BitField<0, 8, u64> decompress_ctb_count;
BitField<8, 32, u64> decompress_zbc_count;
BitField<40, 24, u64> reserved12;
};
union {
BitField<0, 30, u64> source_rect_left;
BitField<30, 2, u64> reserved14;
BitField<32, 30, u64> source_rect_right;
BitField<62, 2, u64> reserved15;
};
union {
BitField<0, 30, u64> source_rect_top;
BitField<30, 2, u64> reserved16;
BitField<32, 30, u64> source_rect_bottom;
BitField<62, 2, u64> reserved17;
};
union {
BitField<0, 14, u64> dest_rect_left;
BitField<14, 2, u64> reserved18;
BitField<16, 14, u64> dest_rect_right;
BitField<30, 2, u64> reserved19;
BitField<32, 14, u64> dest_rect_top;
BitField<46, 2, u64> reserved20;
BitField<48, 14, u64> dest_rect_bottom;
BitField<62, 2, u64> reserved21;
};
u32 reserved22;
u32 reserved23;
};
static_assert(sizeof(SlotConfig) == 0x40, "SlotConfig has the wrong size!");
struct SlotSurfaceConfig {
union {
BitField<0, 7, VideoPixelFormat> slot_pixel_format;
BitField<7, 2, u32> slot_chroma_loc_horiz;
BitField<9, 2, u32> slot_chroma_loc_vert;
BitField<11, 4, u32> slot_block_kind;
BitField<15, 4, u32> slot_block_height;
BitField<19, 3, u32> slot_cache_width;
BitField<22, 10, u32> reserved0;
};
union {
BitField<0, 14, u32> slot_surface_width; // - 1
BitField<14, 14, u32> slot_surface_height; // - 1
BitField<28, 4, u32> reserved1;
};
union {
BitField<0, 14, u32> slot_luma_width; // padded, - 1
BitField<14, 14, u32> slot_luma_height; // padded, - 1
BitField<28, 4, u32> reserved2;
};
union {
BitField<0, 14, u32> slot_chroma_width; // padded, - 1
BitField<14, 14, u32> slot_chroma_height; // padded, - 1
BitField<28, 4, u32> reserved3;
};
};
static_assert(sizeof(SlotSurfaceConfig) == 0x10, "SlotSurfaceConfig has the wrong size!");
struct LumaKeyStruct {
union {
BitField<0, 20, u64> luma_coeff0; // (0) of 4x1 conversion matrix, S12.8 format
BitField<20, 20, u64> luma_coeff1; // (1) of 4x1 conversion matrix, S12.8 format
BitField<40, 20, u64> luma_coeff2; // (2) of 4x1 conversion matrix, S12.8 format
BitField<60, 4, u64> luma_r_shift;
};
union {
BitField<0, 20, u64> luma_coeff3; // (3) of 4x1 conversion matrix, S12.8 format
BitField<20, 10, u64> luma_key_lower;
BitField<30, 10, u64> luma_key_upper;
BitField<40, 1, u64> luma_key_enabled;
BitField<41, 2, u64> reserved0;
BitField<43, 21, u64> reserved1;
};
};
static_assert(sizeof(LumaKeyStruct) == 0x10, "LumaKeyStruct has the wrong size!");
struct BlendingSlotStruct {
union {
BitField<0, 10, u32> alpha_k1;
BitField<10, 6, u32> reserved0;
BitField<16, 10, u32> alpha_k2;
BitField<26, 6, u32> reserved1;
};
union {
BitField<0, 3, BLEND_SRCFACTC> src_factor_color_match_select;
BitField<3, 1, u32> reserved2;
BitField<4, 3, BLEND_DSTFACTC> dst_factor_color_match_select;
BitField<7, 1, u32> reserved3;
BitField<8, 3, BLEND_SRCFACTA> src_factor_a_match_select;
BitField<11, 1, u32> reserved4;
BitField<12, 3, BLEND_DSTFACTA> dst_factor_a_match_select;
BitField<15, 1, u32> reserved5;
BitField<16, 4, u32> reserved6;
BitField<20, 4, u32> reserved7;
BitField<24, 4, u32> reserved8;
BitField<28, 4, u32> reserved9;
};
union {
BitField<0, 2, u32> reserved10;
BitField<2, 10, u32> override_r;
BitField<12, 10, u32> override_g;
BitField<22, 10, u32> override_b;
};
union {
BitField<0, 10, u32> override_a;
BitField<10, 2, u32> reserved11;
BitField<12, 1, u32> use_override_r;
BitField<13, 1, u32> use_override_g;
BitField<14, 1, u32> use_override_b;
BitField<15, 1, u32> use_override_a;
BitField<16, 1, u32> mask_r;
BitField<17, 1, u32> mask_g;
BitField<18, 1, u32> mask_b;
BitField<19, 1, u32> mask_a;
BitField<20, 12, u32> reserved12;
};
};
static_assert(sizeof(BlendingSlotStruct) == 0x10, "BlendingSlotStruct has the wrong size!");
struct SlotStruct {
SlotConfig config;
SlotSurfaceConfig surface_config;
LumaKeyStruct luma_key;
MatrixStruct color_matrix;
MatrixStruct gamut_matrix;
BlendingSlotStruct blending;
};
static_assert(sizeof(SlotStruct) == 0xB0, "SlotStruct has the wrong size!");
struct ConfigStruct {
PipeConfig pipe_config;
OutputConfig output_config;
OutputSurfaceConfig output_surface_config;
MatrixStruct out_color_matrix;
std::array<ClearRectStruct, 4> clear_rects;
std::array<SlotStruct, 8> slot_structs;
};
static_assert(offsetof(ConfigStruct, pipe_config) == 0x0, "pipe_config is in the wrong place!");
static_assert(offsetof(ConfigStruct, output_config) == 0x10,
"output_config is in the wrong place!");
static_assert(offsetof(ConfigStruct, output_surface_config) == 0x20,
"output_surface_config is in the wrong place!");
static_assert(offsetof(ConfigStruct, out_color_matrix) == 0x30,
"out_color_matrix is in the wrong place!");
static_assert(offsetof(ConfigStruct, clear_rects) == 0x50, "clear_rects is in the wrong place!");
static_assert(offsetof(ConfigStruct, slot_structs) == 0x90, "slot_structs is in the wrong place!");
static_assert(sizeof(ConfigStruct) == 0x610, "ConfigStruct has the wrong size!");
struct VicRegisters {
static constexpr std::size_t NUM_REGS = 0x446;
union {
struct {
INSERT_PADDING_WORDS_NOINIT(0xC0);
u32 execute;
INSERT_PADDING_WORDS_NOINIT(0x3F);
std::array<std::array<PlaneOffsets, 8>, 8> surfaces;
u32 picture_index;
u32 control_params;
Offset config_struct_offset;
Offset filter_struct_offset;
Offset palette_offset;
Offset hist_offset;
u32 context_id;
u32 fce_ucode_size;
PlaneOffsets output_surface;
Offset fce_ucode_offset;
INSERT_PADDING_WORDS_NOINIT(0x4);
std::array<u32, 8> slot_context_ids;
std::array<Offset, 8> comp_tag_buffer_offsets;
std::array<Offset, 8> history_buffer_offset;
INSERT_PADDING_WORDS_NOINIT(0x25D);
u32 pm_trigger_end;
};
std::array<u32, NUM_REGS> reg_array;
};
};
static_assert(offsetof(VicRegisters, execute) == 0x300, "execute is in the wrong place!");
static_assert(offsetof(VicRegisters, surfaces) == 0x400, "surfaces is in the wrong place!");
static_assert(offsetof(VicRegisters, picture_index) == 0x700,
"picture_index is in the wrong place!");
static_assert(offsetof(VicRegisters, control_params) == 0x704,
"control_params is in the wrong place!");
static_assert(offsetof(VicRegisters, config_struct_offset) == 0x708,
"config_struct_offset is in the wrong place!");
static_assert(offsetof(VicRegisters, output_surface) == 0x720,
"output_surface is in the wrong place!");
static_assert(offsetof(VicRegisters, slot_context_ids) == 0x740,
"slot_context_ids is in the wrong place!");
static_assert(offsetof(VicRegisters, history_buffer_offset) == 0x780,
"history_buffer_offset is in the wrong place!");
static_assert(offsetof(VicRegisters, pm_trigger_end) == 0x1114,
"pm_trigger_end is in the wrong place!");
static_assert(sizeof(VicRegisters) == 0x1118, "VicRegisters has the wrong size!");
class Vic final : public CDmaPusher {
public: public:
enum class Method : u32 { enum class Method : u32 {
Execute = 0xc0, Execute = offsetof(VicRegisters, execute),
SetControlParams = 0x1c1, SetControlParams = offsetof(VicRegisters, control_params),
SetConfigStructOffset = 0x1c2, SetConfigStructOffset = offsetof(VicRegisters, config_struct_offset),
SetOutputSurfaceLumaOffset = 0x1c8, SetOutputSurfaceLumaOffset = offsetof(VicRegisters, output_surface.luma),
SetOutputSurfaceChromaOffset = 0x1c9, SetOutputSurfaceChromaOffset = offsetof(VicRegisters, output_surface.chroma_u),
SetOutputSurfaceChromaUnusedOffset = 0x1ca SetOutputSurfaceChromaUnusedOffset = offsetof(VicRegisters, output_surface.chroma_v)
}; };
explicit Vic(Host1x& host1x, std::shared_ptr<Nvdec> nvdec_processor); explicit Vic(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue);
~Vic(); ~Vic();
/// Write to the device state. /// Write to the device state.
void ProcessMethod(Method method, u32 argument); void ProcessMethod(u32 method, u32 arg) override;
private: private:
void Execute(); void Execute();
void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config); void Blend(const ConfigStruct& config, const SlotStruct& slot);
void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config); template <bool Planar, bool Interlaced = false>
void ReadProgressiveY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
std::shared_ptr<const FFmpeg::Frame> frame);
template <bool Planar, bool TopField>
void ReadInterlacedY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
std::shared_ptr<const FFmpeg::Frame> frame);
Host1x& host1x; template <bool Planar>
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; void ReadY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
std::shared_ptr<const FFmpeg::Frame> frame);
/// Avoid reallocation of the following buffers every frame, as their void WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config);
/// size does not change during a stream
using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
AVMallocPtr converted_frame_buffer;
Common::ScratchBuffer<u8> luma_buffer;
Common::ScratchBuffer<u8> chroma_buffer;
GPUVAddr config_struct_address{}; template <VideoPixelFormat Format>
GPUVAddr output_surface_luma_address{}; void WriteABGR(const OutputSurfaceConfig& output_surface_config);
GPUVAddr output_surface_chroma_address{};
SwsContext* scaler_ctx{}; s32 id;
s32 scaler_width{}; s32 nvdec_id{-1};
s32 scaler_height{}; u32 syncpoint;
VicRegisters regs{};
FrameQueue& frame_queue;
const bool has_sse41{false};
Common::ScratchBuffer<Pixel> output_surface;
Common::ScratchBuffer<Pixel> slot_surface;
Common::ScratchBuffer<u8> luma_scratch;
Common::ScratchBuffer<u8> chroma_scratch;
Common::ScratchBuffer<u8> swizzle_scratch;
}; };
} // namespace Host1x } // namespace Tegra::Host1x
} // namespace Tegra

View File

@ -43,6 +43,8 @@ public:
u64 big_page_bits_ = 16, u64 page_bits_ = 12); u64 big_page_bits_ = 16, u64 page_bits_ = 12);
~MemoryManager(); ~MemoryManager();
static constexpr bool HAS_FLUSH_INVALIDATION = true;
size_t GetID() const { size_t GetID() const {
return unique_identifier; return unique_identifier;
} }

View File

@ -352,6 +352,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_native_ndc = device.IsExtDepthClipControlSupported(), .support_native_ndc = device.IsExtDepthClipControlSupported(),
.support_scaled_attributes = !device.MustEmulateScaledFormats(), .support_scaled_attributes = !device.MustEmulateScaledFormats(),
.support_multi_viewport = device.SupportsMultiViewport(), .support_multi_viewport = device.SupportsMultiViewport(),
.support_geometry_streams = device.AreTransformFeedbackGeometryStreamsSupported(),
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),

View File

@ -1064,8 +1064,6 @@ public:
} }
}); });
} }
auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address);
ASSERT(ptr != nullptr);
new_query->dependant_manage = must_manage_dependance; new_query->dependant_manage = must_manage_dependance;
pending_flush_queries.push_back(index); pending_flush_queries.push_back(index);
@ -1104,9 +1102,11 @@ public:
tfb_streamer.Free(query->dependant_index); tfb_streamer.Free(query->dependant_index);
} else { } else {
u8* pointer = device_memory.GetPointer<u8>(query->dependant_address); u8* pointer = device_memory.GetPointer<u8>(query->dependant_address);
u32 result; if (pointer != nullptr) {
std::memcpy(&result, pointer, sizeof(u32)); u32 result;
num_vertices = static_cast<u64>(result) / query->stride; std::memcpy(&result, pointer, sizeof(u32));
num_vertices = static_cast<u64>(result) / query->stride;
}
} }
query->value = [&]() -> u64 { query->value = [&]() -> u64 {
switch (query->topology) { switch (query->topology) {
@ -1360,7 +1360,9 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
const auto check_value = [&](DAddr address) { const auto check_value = [&](DAddr address) {
u8* ptr = impl->device_memory.GetPointer<u8>(address); u8* ptr = impl->device_memory.GetPointer<u8>(address);
u64 value{}; u64 value{};
std::memcpy(&value, ptr, sizeof(value)); if (ptr != nullptr) {
std::memcpy(&value, ptr, sizeof(value));
}
return value == 0; return value == 0;
}; };
std::array<VideoCommon::LookupData*, 2> objects{&object_1, &object_2}; std::array<VideoCommon::LookupData*, 2> objects{&object_1, &object_2};

View File

@ -72,12 +72,19 @@ TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
template <class P> template <class P>
void TextureCache<P>::RunGarbageCollector() { void TextureCache<P>::RunGarbageCollector() {
bool high_priority_mode = total_used_memory >= expected_memory; bool high_priority_mode = false;
bool aggressive_mode = total_used_memory >= critical_memory; bool aggressive_mode = false;
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; u64 ticks_to_destroy = 0;
size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); size_t num_iterations = 0;
const auto clean_up = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) { const auto Configure = [&](bool allow_aggressive) {
high_priority_mode = total_used_memory >= expected_memory;
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
};
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) {
if (num_iterations == 0) { if (num_iterations == 0) {
return true; return true;
} }
@ -123,7 +130,16 @@ void TextureCache<P>::RunGarbageCollector() {
} }
return false; return false;
}; };
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
// Try to remove anything old enough and not high priority.
Configure(false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
// If pressure is still too high, prune aggressively.
if (total_used_memory >= critical_memory) {
Configure(true);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
}
} }
template <class P> template <class P>

View File

@ -499,6 +499,11 @@ public:
return extensions.transform_feedback; return extensions.transform_feedback;
} }
/// Returns true if the device supports VK_EXT_transform_feedback properly.
bool AreTransformFeedbackGeometryStreamsSupported() const {
return features.transform_feedback.geometryStreams;
}
/// Returns true if the device supports VK_EXT_custom_border_color. /// Returns true if the device supports VK_EXT_custom_border_color.
bool IsExtCustomBorderColorSupported() const { bool IsExtCustomBorderColorSupported() const {
return extensions.custom_border_color; return extensions.custom_border_color;