Compare commits
14 Commits
Author | SHA1 | Date | |
---|---|---|---|
cfdb10a7ba | |||
8012b28b92 | |||
531d280461 | |||
d702915624 | |||
943d5eeddf | |||
f3ac6f054f | |||
a94af8ea62 | |||
6da4853360 | |||
b738584832 | |||
1cb34ea0d3 | |||
662bb9ba77 | |||
26e3f96983 | |||
cd3244f139 | |||
e5310b25d4 |
8
.github/workflows/ci-merge.js
vendored
8
.github/workflows/ci-merge.js
vendored
@ -11,7 +11,7 @@ async function checkBaseChanges(github, context) {
|
||||
repository(name:$name, owner:$owner) {
|
||||
ref(qualifiedName:$ref) {
|
||||
target {
|
||||
... on Commit { id pushedDate oid }
|
||||
... on Commit { id committedDate oid }
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -22,9 +22,9 @@ async function checkBaseChanges(github, context) {
|
||||
ref: 'refs/heads/master',
|
||||
};
|
||||
const result = await github.graphql(query, variables);
|
||||
const pushedAt = result.repository.ref.target.pushedDate;
|
||||
console.log(`Last commit pushed at ${pushedAt}.`);
|
||||
const delta = new Date() - new Date(pushedAt);
|
||||
const committedAt = result.repository.ref.target.committedDate;
|
||||
console.log(`Last commit committed at ${committedAt}.`);
|
||||
const delta = new Date() - new Date(committedAt);
|
||||
if (delta <= DETECTION_TIME_FRAME) {
|
||||
console.info('New changes detected, triggering a new build.');
|
||||
return true;
|
||||
|
@ -13,8 +13,8 @@ public class EmulationMenuSettings {
|
||||
public static final int LayoutOption_SingleScreen = 1;
|
||||
public static final int LayoutOption_LargeScreen = 2;
|
||||
public static final int LayoutOption_SideScreen = 3;
|
||||
public static final int LayoutOption_MobilePortrait = 4;
|
||||
public static final int LayoutOption_MobileLandscape = 5;
|
||||
public static final int LayoutOption_MobilePortrait = 5;
|
||||
public static final int LayoutOption_MobileLandscape = 6;
|
||||
|
||||
public static boolean getJoystickRelCenter() {
|
||||
return mPreferences.getBoolean("EmulationMenuSettings_JoystickRelCenter", true);
|
||||
|
@ -151,7 +151,7 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) {
|
||||
Camera::RegisterFactory("ndk", std::move(ndk_factory));
|
||||
|
||||
// Register frontend applets
|
||||
Frontend::RegisterDefaultApplets();
|
||||
Frontend::RegisterDefaultApplets(system);
|
||||
system.RegisterMiiSelector(std::make_shared<MiiSelector::AndroidMiiSelector>());
|
||||
system.RegisterSoftwareKeyboard(std::make_shared<SoftwareKeyboard::AndroidKeyboard>());
|
||||
|
||||
|
@ -37,39 +37,33 @@ CubebInput::CubebInput(std::string device_id)
|
||||
}
|
||||
|
||||
CubebInput::~CubebInput() {
|
||||
if (!impl->ctx)
|
||||
return;
|
||||
|
||||
if (impl->stream) {
|
||||
if (cubeb_stream_stop(impl->stream) != CUBEB_OK) {
|
||||
LOG_ERROR(Audio, "Error stopping cubeb input stream.");
|
||||
}
|
||||
|
||||
cubeb_stream_destroy(impl->stream);
|
||||
}
|
||||
|
||||
cubeb_destroy(impl->ctx);
|
||||
if (impl->ctx) {
|
||||
cubeb_destroy(impl->ctx);
|
||||
}
|
||||
}
|
||||
|
||||
void CubebInput::StartSampling(const InputParameters& params) {
|
||||
// Cubeb apparently only supports signed 16 bit PCM (and float32 which the 3ds doesn't support)
|
||||
// TODO resample the input stream
|
||||
// TODO: Resample the input stream.
|
||||
if (params.sign == Signedness::Unsigned) {
|
||||
LOG_ERROR(Audio,
|
||||
"Application requested unsupported unsigned pcm format. Falling back to signed");
|
||||
"Application requested unsupported unsigned pcm format. Falling back to signed.");
|
||||
}
|
||||
|
||||
impl->sample_size_in_bytes = params.sample_size / 8;
|
||||
|
||||
parameters = params;
|
||||
is_sampling = true;
|
||||
impl->sample_size_in_bytes = params.sample_size / 8;
|
||||
|
||||
cubeb_devid input_device = nullptr;
|
||||
if (device_id != auto_device_name && !device_id.empty()) {
|
||||
cubeb_device_collection collection;
|
||||
if (cubeb_enumerate_devices(impl->ctx, CUBEB_DEVICE_TYPE_INPUT, &collection) != CUBEB_OK) {
|
||||
LOG_WARNING(Audio, "Audio input device enumeration not supported");
|
||||
} else {
|
||||
if (cubeb_enumerate_devices(impl->ctx, CUBEB_DEVICE_TYPE_INPUT, &collection) == CUBEB_OK) {
|
||||
const auto collection_end = collection.device + collection.count;
|
||||
const auto device = std::find_if(
|
||||
collection.device, collection_end, [this](const cubeb_device_info& info) {
|
||||
@ -79,39 +73,42 @@ void CubebInput::StartSampling(const InputParameters& params) {
|
||||
input_device = device->devid;
|
||||
}
|
||||
cubeb_device_collection_destroy(impl->ctx, &collection);
|
||||
} else {
|
||||
LOG_WARNING(Audio_Sink,
|
||||
"Audio input device enumeration not supported, using default device.");
|
||||
}
|
||||
}
|
||||
|
||||
cubeb_stream_params input_params;
|
||||
input_params.channels = 1;
|
||||
input_params.layout = CUBEB_LAYOUT_UNDEFINED;
|
||||
input_params.prefs = CUBEB_STREAM_PREF_NONE;
|
||||
input_params.format = CUBEB_SAMPLE_S16LE;
|
||||
input_params.rate = params.sample_rate;
|
||||
cubeb_stream_params input_params = {
|
||||
.format = CUBEB_SAMPLE_S16LE,
|
||||
.rate = params.sample_rate,
|
||||
.channels = 1,
|
||||
.layout = CUBEB_LAYOUT_UNDEFINED,
|
||||
};
|
||||
|
||||
u32 latency_frames = 512; // Firefox default
|
||||
if (cubeb_get_min_latency(impl->ctx, &input_params, &latency_frames) != CUBEB_OK) {
|
||||
LOG_ERROR(Audio, "Could not get minimum latency");
|
||||
LOG_WARNING(Audio, "Error getting minimum input latency, falling back to default latency.");
|
||||
}
|
||||
|
||||
if (cubeb_stream_init(impl->ctx, &impl->stream, "Citra Microphone", input_device, &input_params,
|
||||
nullptr, nullptr, latency_frames, Impl::DataCallback, Impl::StateCallback,
|
||||
impl.get()) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio, "Error creating cubeb input stream");
|
||||
is_sampling = false;
|
||||
LOG_CRITICAL(Audio, "Error creating cubeb input stream.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (cubeb_stream_start(impl->stream) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio, "Error starting cubeb input stream");
|
||||
is_sampling = false;
|
||||
LOG_CRITICAL(Audio, "Error starting cubeb input stream.");
|
||||
cubeb_stream_destroy(impl->stream);
|
||||
impl->stream = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
is_sampling = true;
|
||||
}
|
||||
|
||||
void CubebInput::StopSampling() {
|
||||
// TODO(xperia64): Destroy the stream for now to avoid a leak because StartSampling
|
||||
// reinitializes the stream every time
|
||||
if (impl->stream) {
|
||||
cubeb_stream_stop(impl->stream);
|
||||
cubeb_stream_destroy(impl->stream);
|
||||
@ -121,8 +118,14 @@ void CubebInput::StopSampling() {
|
||||
}
|
||||
|
||||
void CubebInput::AdjustSampleRate(u32 sample_rate) {
|
||||
// TODO This should restart the stream with the new sample rate
|
||||
LOG_ERROR(Audio, "AdjustSampleRate unimplemented!");
|
||||
if (!is_sampling) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto new_parameters = parameters;
|
||||
new_parameters.sample_rate = sample_rate;
|
||||
StopSampling();
|
||||
StartSampling(new_parameters);
|
||||
}
|
||||
|
||||
Samples CubebInput::Read() {
|
||||
@ -136,7 +139,7 @@ Samples CubebInput::Read() {
|
||||
|
||||
long CubebInput::Impl::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
|
||||
void* output_buffer, long num_frames) {
|
||||
Impl* impl = static_cast<Impl*>(user_data);
|
||||
auto impl = static_cast<Impl*>(user_data);
|
||||
if (!impl) {
|
||||
return 0;
|
||||
}
|
||||
@ -177,9 +180,7 @@ std::vector<std::string> ListCubebInputDevices() {
|
||||
}
|
||||
|
||||
cubeb_device_collection collection;
|
||||
if (cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_INPUT, &collection) != CUBEB_OK) {
|
||||
LOG_WARNING(Audio_Sink, "Audio input device enumeration not supported");
|
||||
} else {
|
||||
if (cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_INPUT, &collection) == CUBEB_OK) {
|
||||
for (std::size_t i = 0; i < collection.count; i++) {
|
||||
const cubeb_device_info& device = collection.device[i];
|
||||
if (device.state == CUBEB_DEVICE_STATE_ENABLED && device.friendly_name) {
|
||||
@ -187,6 +188,8 @@ std::vector<std::string> ListCubebInputDevices() {
|
||||
}
|
||||
}
|
||||
cubeb_device_collection_destroy(ctx, &collection);
|
||||
} else {
|
||||
LOG_WARNING(Audio_Sink, "Audio input device enumeration not supported.");
|
||||
}
|
||||
|
||||
cubeb_destroy(ctx);
|
||||
|
@ -13,8 +13,6 @@
|
||||
namespace AudioCore {
|
||||
|
||||
struct CubebSink::Impl {
|
||||
unsigned int sample_rate = 0;
|
||||
|
||||
cubeb* ctx = nullptr;
|
||||
cubeb_stream* stream = nullptr;
|
||||
|
||||
@ -31,28 +29,29 @@ CubebSink::CubebSink(std::string_view target_device_name) : impl(std::make_uniqu
|
||||
LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
|
||||
return;
|
||||
}
|
||||
cubeb_set_log_callback(CUBEB_LOG_NORMAL, &Impl::LogCallback);
|
||||
|
||||
impl->sample_rate = native_sample_rate;
|
||||
if (cubeb_set_log_callback(CUBEB_LOG_NORMAL, &Impl::LogCallback) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio_Sink, "cubeb_set_log_callback failed");
|
||||
return;
|
||||
}
|
||||
|
||||
cubeb_stream_params params;
|
||||
params.rate = impl->sample_rate;
|
||||
params.channels = 2;
|
||||
params.layout = CUBEB_LAYOUT_STEREO;
|
||||
params.format = CUBEB_SAMPLE_S16NE;
|
||||
params.prefs = CUBEB_STREAM_PREF_PERSIST;
|
||||
cubeb_stream_params params = {
|
||||
.format = CUBEB_SAMPLE_S16LE,
|
||||
.rate = native_sample_rate,
|
||||
.channels = 2,
|
||||
.layout = CUBEB_LAYOUT_STEREO,
|
||||
};
|
||||
|
||||
u32 minimum_latency = 100 * impl->sample_rate / 1000; // Firefox default
|
||||
u32 minimum_latency = 100 * native_sample_rate / 1000; // Firefox default
|
||||
if (cubeb_get_min_latency(impl->ctx, ¶ms, &minimum_latency) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio_Sink, "Error getting minimum latency");
|
||||
LOG_WARNING(Audio_Sink,
|
||||
"Error getting minimum output latency, falling back to default latency.");
|
||||
}
|
||||
|
||||
cubeb_devid output_device = nullptr;
|
||||
if (target_device_name != auto_device_name && !target_device_name.empty()) {
|
||||
cubeb_device_collection collection;
|
||||
if (cubeb_enumerate_devices(impl->ctx, CUBEB_DEVICE_TYPE_OUTPUT, &collection) != CUBEB_OK) {
|
||||
LOG_WARNING(Audio_Sink, "Audio output device enumeration not supported");
|
||||
} else {
|
||||
if (cubeb_enumerate_devices(impl->ctx, CUBEB_DEVICE_TYPE_OUTPUT, &collection) == CUBEB_OK) {
|
||||
const auto collection_end{collection.device + collection.count};
|
||||
const auto device{
|
||||
std::find_if(collection.device, collection_end, [&](const cubeb_device_info& info) {
|
||||
@ -63,12 +62,15 @@ CubebSink::CubebSink(std::string_view target_device_name) : impl(std::make_uniqu
|
||||
output_device = device->devid;
|
||||
}
|
||||
cubeb_device_collection_destroy(impl->ctx, &collection);
|
||||
} else {
|
||||
LOG_WARNING(Audio_Sink,
|
||||
"Audio output device enumeration not supported, using default device.");
|
||||
}
|
||||
}
|
||||
|
||||
int stream_err = cubeb_stream_init(impl->ctx, &impl->stream, "CitraAudio", nullptr, nullptr,
|
||||
output_device, ¶ms, std::max(512u, minimum_latency),
|
||||
&Impl::DataCallback, &Impl::StateCallback, impl.get());
|
||||
auto stream_err = cubeb_stream_init(impl->ctx, &impl->stream, "CitraAudio", nullptr, nullptr,
|
||||
output_device, ¶ms, std::max(512u, minimum_latency),
|
||||
&Impl::DataCallback, &Impl::StateCallback, impl.get());
|
||||
if (stream_err != CUBEB_OK) {
|
||||
switch (stream_err) {
|
||||
case CUBEB_ERROR:
|
||||
@ -92,23 +94,20 @@ CubebSink::CubebSink(std::string_view target_device_name) : impl(std::make_uniqu
|
||||
}
|
||||
|
||||
CubebSink::~CubebSink() {
|
||||
if (!impl->ctx) {
|
||||
return;
|
||||
if (impl->stream) {
|
||||
if (cubeb_stream_stop(impl->stream) != CUBEB_OK) {
|
||||
LOG_ERROR(Audio_Sink, "Error stopping cubeb stream.");
|
||||
}
|
||||
cubeb_stream_destroy(impl->stream);
|
||||
}
|
||||
|
||||
if (cubeb_stream_stop(impl->stream) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio_Sink, "Error stopping cubeb stream");
|
||||
if (impl->ctx) {
|
||||
cubeb_destroy(impl->ctx);
|
||||
}
|
||||
|
||||
cubeb_stream_destroy(impl->stream);
|
||||
cubeb_destroy(impl->ctx);
|
||||
}
|
||||
|
||||
unsigned int CubebSink::GetNativeSampleRate() const {
|
||||
if (!impl->ctx)
|
||||
return native_sample_rate;
|
||||
|
||||
return impl->sample_rate;
|
||||
return native_sample_rate;
|
||||
}
|
||||
|
||||
void CubebSink::SetCallback(std::function<void(s16*, std::size_t)> cb) {
|
||||
@ -121,13 +120,12 @@ long CubebSink::Impl::DataCallback(cubeb_stream* stream, void* user_data, const
|
||||
auto* buffer = static_cast<s16*>(output_buffer);
|
||||
|
||||
if (!impl || !impl->cb) {
|
||||
LOG_DEBUG(Audio_Sink, "Emitting zeros");
|
||||
LOG_DEBUG(Audio_Sink, "Missing internal data and/or audio callback, emitting zeroes.");
|
||||
std::memset(output_buffer, 0, num_frames * 2 * sizeof(s16));
|
||||
return num_frames;
|
||||
} else {
|
||||
impl->cb(buffer, num_frames);
|
||||
}
|
||||
|
||||
impl->cb(buffer, num_frames);
|
||||
|
||||
return num_frames;
|
||||
}
|
||||
|
||||
@ -149,7 +147,7 @@ void CubebSink::Impl::StateCallback(cubeb_stream* stream, void* user_data, cubeb
|
||||
}
|
||||
|
||||
void CubebSink::Impl::LogCallback(char const* format, ...) {
|
||||
std::array<char, 512> buffer;
|
||||
std::array<char, 512> buffer{};
|
||||
std::va_list args;
|
||||
va_start(args, format);
|
||||
#ifdef _MSC_VER
|
||||
@ -166,15 +164,13 @@ std::vector<std::string> ListCubebSinkDevices() {
|
||||
std::vector<std::string> device_list;
|
||||
cubeb* ctx;
|
||||
|
||||
if (cubeb_init(&ctx, "CitraEnumerator", nullptr) != CUBEB_OK) {
|
||||
if (cubeb_init(&ctx, "Citra Output Device Enumerator", nullptr) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
|
||||
return {};
|
||||
}
|
||||
|
||||
cubeb_device_collection collection;
|
||||
if (cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_OUTPUT, &collection) != CUBEB_OK) {
|
||||
LOG_WARNING(Audio_Sink, "Audio output device enumeration not supported");
|
||||
} else {
|
||||
if (cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_OUTPUT, &collection) == CUBEB_OK) {
|
||||
for (std::size_t i = 0; i < collection.count; i++) {
|
||||
const cubeb_device_info& device = collection.device[i];
|
||||
if (device.state == CUBEB_DEVICE_STATE_ENABLED && device.friendly_name) {
|
||||
@ -182,6 +178,8 @@ std::vector<std::string> ListCubebSinkDevices() {
|
||||
}
|
||||
}
|
||||
cubeb_device_collection_destroy(ctx, &collection);
|
||||
} else {
|
||||
LOG_WARNING(Audio_Sink, "Audio output device enumeration not supported.");
|
||||
}
|
||||
|
||||
cubeb_destroy(ctx);
|
||||
|
@ -66,7 +66,7 @@ public:
|
||||
|
||||
private:
|
||||
const std::size_t source_id;
|
||||
Memory::MemorySystem* memory_system;
|
||||
const Memory::MemorySystem* memory_system{};
|
||||
StereoFrame16 current_frame;
|
||||
|
||||
using Format = SourceConfiguration::Configuration::Format;
|
||||
|
@ -346,7 +346,7 @@ int main(int argc, char** argv) {
|
||||
system.ApplySettings();
|
||||
|
||||
// Register frontend applets
|
||||
Frontend::RegisterDefaultApplets();
|
||||
Frontend::RegisterDefaultApplets(system);
|
||||
|
||||
EmuWindow_SDL2::InitializeSDL2();
|
||||
|
||||
@ -354,12 +354,12 @@ int main(int argc, char** argv) {
|
||||
bool is_secondary) -> std::unique_ptr<EmuWindow_SDL2> {
|
||||
switch (Settings::values.graphics_api.GetValue()) {
|
||||
case Settings::GraphicsAPI::OpenGL:
|
||||
return std::make_unique<EmuWindow_SDL2_GL>(fullscreen, is_secondary);
|
||||
return std::make_unique<EmuWindow_SDL2_GL>(system, fullscreen, is_secondary);
|
||||
case Settings::GraphicsAPI::Software:
|
||||
return std::make_unique<EmuWindow_SDL2_SW>(system, fullscreen, is_secondary);
|
||||
}
|
||||
LOG_ERROR(Frontend, "Invalid Graphics API, using OpenGL");
|
||||
return std::make_unique<EmuWindow_SDL2_GL>(fullscreen, is_secondary);
|
||||
return std::make_unique<EmuWindow_SDL2_GL>(system, fullscreen, is_secondary);
|
||||
};
|
||||
|
||||
const auto emu_window{create_emu_window(fullscreen, false)};
|
||||
|
@ -109,7 +109,8 @@ void EmuWindow_SDL2::Fullscreen() {
|
||||
SDL_MaximizeWindow(render_window);
|
||||
}
|
||||
|
||||
EmuWindow_SDL2::EmuWindow_SDL2(bool is_secondary) : EmuWindow(is_secondary) {}
|
||||
EmuWindow_SDL2::EmuWindow_SDL2(Core::System& system_, bool is_secondary)
|
||||
: EmuWindow(is_secondary), system(system_) {}
|
||||
|
||||
EmuWindow_SDL2::~EmuWindow_SDL2() {
|
||||
SDL_Quit();
|
||||
@ -202,7 +203,7 @@ void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minima
|
||||
void EmuWindow_SDL2::UpdateFramerateCounter() {
|
||||
const u32 current_time = SDL_GetTicks();
|
||||
if (current_time > last_time + 2000) {
|
||||
const auto results = Core::System::GetInstance().GetAndResetPerfStats();
|
||||
const auto results = system.GetAndResetPerfStats();
|
||||
const auto title =
|
||||
fmt::format("Citra {} | {}-{} | FPS: {:.0f} ({:.0f}%)", Common::g_build_fullname,
|
||||
Common::g_scm_branch, Common::g_scm_desc, results.game_fps,
|
||||
|
@ -10,9 +10,13 @@
|
||||
|
||||
struct SDL_Window;
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
class EmuWindow_SDL2 : public Frontend::EmuWindow {
|
||||
public:
|
||||
explicit EmuWindow_SDL2(bool is_secondary);
|
||||
explicit EmuWindow_SDL2(Core::System& system_, bool is_secondary);
|
||||
~EmuWindow_SDL2();
|
||||
|
||||
/// Initializes SDL2
|
||||
@ -78,4 +82,6 @@ protected:
|
||||
|
||||
/// Keeps track of how often to update the title bar during gameplay
|
||||
u32 last_time = 0;
|
||||
|
||||
Core::System& system;
|
||||
};
|
||||
|
@ -42,8 +42,8 @@ private:
|
||||
SDL_GLContext context;
|
||||
};
|
||||
|
||||
EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen, bool is_secondary)
|
||||
: EmuWindow_SDL2{is_secondary} {
|
||||
EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system_, bool fullscreen, bool is_secondary)
|
||||
: EmuWindow_SDL2{system_, is_secondary} {
|
||||
// Initialize the window
|
||||
if (Settings::values.use_gles) {
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3);
|
||||
|
@ -9,9 +9,13 @@
|
||||
|
||||
struct SDL_Window;
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
class EmuWindow_SDL2_GL : public EmuWindow_SDL2 {
|
||||
public:
|
||||
explicit EmuWindow_SDL2_GL(bool fullscreen, bool is_secondary);
|
||||
explicit EmuWindow_SDL2_GL(Core::System& system_, bool fullscreen, bool is_secondary);
|
||||
~EmuWindow_SDL2_GL();
|
||||
|
||||
void Present() override;
|
||||
|
@ -18,7 +18,7 @@
|
||||
class DummyContext : public Frontend::GraphicsContext {};
|
||||
|
||||
EmuWindow_SDL2_SW::EmuWindow_SDL2_SW(Core::System& system_, bool fullscreen, bool is_secondary)
|
||||
: EmuWindow_SDL2{is_secondary}, system{system_} {
|
||||
: EmuWindow_SDL2{system_, is_secondary}, system{system_} {
|
||||
std::string window_title = fmt::format("Citra {} | {}-{}", Common::g_build_fullname,
|
||||
Common::g_scm_branch, Common::g_scm_desc);
|
||||
render_window =
|
||||
|
@ -44,7 +44,8 @@
|
||||
|
||||
static Frontend::WindowSystemType GetWindowSystemType();
|
||||
|
||||
EmuThread::EmuThread(Frontend::GraphicsContext& core_context) : core_context(core_context) {}
|
||||
EmuThread::EmuThread(Core::System& system_, Frontend::GraphicsContext& core_context)
|
||||
: system{system_}, core_context(core_context) {}
|
||||
|
||||
EmuThread::~EmuThread() = default;
|
||||
|
||||
@ -62,7 +63,6 @@ static GMainWindow* GetMainWindow() {
|
||||
void EmuThread::run() {
|
||||
MicroProfileOnThreadCreate("EmuThread");
|
||||
const auto scope = core_context.Acquire();
|
||||
Core::System& system = Core::System::GetInstance();
|
||||
|
||||
if (Settings::values.preload_textures) {
|
||||
emit LoadProgress(VideoCore::LoadCallbackStage::Preload, 0, 0);
|
||||
@ -107,7 +107,7 @@ void EmuThread::run() {
|
||||
}
|
||||
if (result != Core::System::ResultStatus::Success) {
|
||||
this->SetRunning(false);
|
||||
emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails());
|
||||
emit ErrorThrown(result, system.GetStatusDetails());
|
||||
}
|
||||
|
||||
was_active = running || exec_step;
|
||||
@ -248,8 +248,8 @@ public:
|
||||
#ifdef HAS_OPENGL
|
||||
class OpenGLRenderWidget : public RenderWidget {
|
||||
public:
|
||||
explicit OpenGLRenderWidget(GRenderWindow* parent, bool is_secondary)
|
||||
: RenderWidget(parent), is_secondary(is_secondary) {
|
||||
explicit OpenGLRenderWidget(GRenderWindow* parent, Core::System& system_, bool is_secondary)
|
||||
: RenderWidget(parent), system(system_), is_secondary(is_secondary) {
|
||||
setAttribute(Qt::WA_NativeWindow);
|
||||
setAttribute(Qt::WA_PaintOnScreen);
|
||||
if (GetWindowSystemType() == Frontend::WindowSystemType::Wayland) {
|
||||
@ -266,7 +266,7 @@ public:
|
||||
if (!isVisible()) {
|
||||
return;
|
||||
}
|
||||
if (!Core::System::GetInstance().IsPoweredOn()) {
|
||||
if (!system.IsPoweredOn()) {
|
||||
return;
|
||||
}
|
||||
context->MakeCurrent();
|
||||
@ -284,6 +284,7 @@ public:
|
||||
|
||||
private:
|
||||
std::unique_ptr<Frontend::GraphicsContext> context{};
|
||||
Core::System& system;
|
||||
bool is_secondary;
|
||||
};
|
||||
#endif
|
||||
@ -296,7 +297,7 @@ struct SoftwareRenderWidget : public RenderWidget {
|
||||
if (!isVisible()) {
|
||||
return;
|
||||
}
|
||||
if (!Core::System::GetInstance().IsPoweredOn()) {
|
||||
if (!system.IsPoweredOn()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -666,7 +667,7 @@ bool GRenderWindow::InitializeOpenGL() {
|
||||
|
||||
// TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
|
||||
// WA_DontShowOnScreen, WA_DeleteOnClose
|
||||
auto child = new OpenGLRenderWidget(this, is_secondary);
|
||||
auto child = new OpenGLRenderWidget(this, system, is_secondary);
|
||||
child_widget = child;
|
||||
child_widget->windowHandle()->create();
|
||||
|
||||
|
@ -18,6 +18,10 @@ class QTouchEvent;
|
||||
|
||||
class GRenderWindow;
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
enum class LoadCallbackStage;
|
||||
}
|
||||
@ -26,7 +30,7 @@ class EmuThread final : public QThread {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
explicit EmuThread(Frontend::GraphicsContext& context);
|
||||
explicit EmuThread(Core::System& system_, Frontend::GraphicsContext& context);
|
||||
~EmuThread() override;
|
||||
|
||||
/**
|
||||
@ -80,6 +84,7 @@ private:
|
||||
std::mutex running_mutex;
|
||||
std::condition_variable running_cv;
|
||||
|
||||
Core::System& system;
|
||||
Frontend::GraphicsContext& core_context;
|
||||
|
||||
signals:
|
||||
|
@ -11,9 +11,9 @@
|
||||
#include "core/core.h"
|
||||
#include "ui_compatdb.h"
|
||||
|
||||
CompatDB::CompatDB(QWidget* parent)
|
||||
CompatDB::CompatDB(Core::TelemetrySession& telemetry_session_, QWidget* parent)
|
||||
: QWizard(parent, Qt::WindowTitleHint | Qt::WindowCloseButtonHint | Qt::WindowSystemMenuHint),
|
||||
ui{std::make_unique<Ui::CompatDB>()} {
|
||||
ui{std::make_unique<Ui::CompatDB>()}, telemetry_session{telemetry_session_} {
|
||||
ui->setupUi(this);
|
||||
connect(ui->radioButton_Perfect, &QRadioButton::clicked, this, &CompatDB::EnableNext);
|
||||
connect(ui->radioButton_Great, &QRadioButton::clicked, this, &CompatDB::EnableNext);
|
||||
@ -51,16 +51,15 @@ void CompatDB::Submit() {
|
||||
case CompatDBPage::Final:
|
||||
back();
|
||||
LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
|
||||
Core::System::GetInstance().TelemetrySession().AddField(
|
||||
Common::Telemetry::FieldType::UserFeedback, "Compatibility",
|
||||
compatibility->checkedId());
|
||||
telemetry_session.AddField(Common::Telemetry::FieldType::UserFeedback, "Compatibility",
|
||||
compatibility->checkedId());
|
||||
|
||||
button(NextButton)->setEnabled(false);
|
||||
button(NextButton)->setText(tr("Submitting"));
|
||||
button(CancelButton)->setVisible(false);
|
||||
|
||||
testcase_watcher.setFuture(QtConcurrent::run(
|
||||
[] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
|
||||
testcase_watcher.setFuture(
|
||||
QtConcurrent::run([this] { return telemetry_session.SubmitTestcase(); }));
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
|
||||
|
@ -8,6 +8,10 @@
|
||||
#include <QFutureWatcher>
|
||||
#include <QWizard>
|
||||
|
||||
namespace Core {
|
||||
class TelemetrySession;
|
||||
}
|
||||
|
||||
namespace Ui {
|
||||
class CompatDB;
|
||||
}
|
||||
@ -16,7 +20,7 @@ class CompatDB : public QWizard {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
explicit CompatDB(QWidget* parent = nullptr);
|
||||
explicit CompatDB(Core::TelemetrySession& telemetry_session_, QWidget* parent = nullptr);
|
||||
~CompatDB();
|
||||
|
||||
private:
|
||||
@ -27,4 +31,6 @@ private:
|
||||
void Submit();
|
||||
void OnTestcaseSubmitted();
|
||||
void EnableNext();
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
};
|
||||
|
@ -1223,7 +1223,7 @@ void GMainWindow::BootGame(const QString& filename) {
|
||||
}
|
||||
|
||||
// Create and start the emulation thread
|
||||
emu_thread = std::make_unique<EmuThread>(*render_window);
|
||||
emu_thread = std::make_unique<EmuThread>(system, *render_window);
|
||||
emit EmulationStarting(emu_thread.get());
|
||||
emu_thread->start();
|
||||
|
||||
@ -1814,7 +1814,7 @@ void GMainWindow::OnLoadComplete() {
|
||||
|
||||
void GMainWindow::OnMenuReportCompatibility() {
|
||||
if (!NetSettings::values.citra_token.empty() && !NetSettings::values.citra_username.empty()) {
|
||||
CompatDB compatdb{this};
|
||||
CompatDB compatdb{system.TelemetrySession(), this};
|
||||
compatdb.exec();
|
||||
} else {
|
||||
QMessageBox::critical(this, tr("Missing Citra Account"),
|
||||
@ -2931,7 +2931,7 @@ int main(int argc, char* argv[]) {
|
||||
GMainWindow main_window(system);
|
||||
|
||||
// Register frontend applets
|
||||
Frontend::RegisterDefaultApplets();
|
||||
Frontend::RegisterDefaultApplets(system);
|
||||
|
||||
system.RegisterMiiSelector(std::make_shared<QtMiiSelector>(main_window));
|
||||
system.RegisterSoftwareKeyboard(std::make_shared<QtKeyboard>(main_window));
|
||||
|
@ -3,10 +3,15 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <QImage>
|
||||
#include <QImageReader>
|
||||
#include <QString>
|
||||
#include "citra_qt/qt_image_interface.h"
|
||||
#include "common/logging/log.h"
|
||||
|
||||
QtImageInterface::QtImageInterface() {
|
||||
QImageReader::setAllocationLimit(0);
|
||||
}
|
||||
|
||||
bool QtImageInterface::DecodePNG(std::vector<u8>& dst, u32& width, u32& height,
|
||||
std::span<const u8> src) {
|
||||
QImage image(QImage::fromData(src.data(), static_cast<int>(src.size())));
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
class QtImageInterface final : public Frontend::ImageInterface {
|
||||
public:
|
||||
QtImageInterface();
|
||||
bool DecodePNG(std::vector<u8>& dst, u32& width, u32& height, std::span<const u8> src) override;
|
||||
bool EncodePNG(const std::string& path, u32 width, u32 height,
|
||||
std::span<const u8> src) override;
|
||||
|
@ -88,6 +88,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
|
||||
SUB(Service, FRD) \
|
||||
SUB(Service, FS) \
|
||||
SUB(Service, ERR) \
|
||||
SUB(Service, ACT) \
|
||||
SUB(Service, APT) \
|
||||
SUB(Service, BOSS) \
|
||||
SUB(Service, GSP) \
|
||||
|
@ -55,6 +55,7 @@ enum class Class : u8 {
|
||||
Service_FRD, ///< The FRD (Friends) service
|
||||
Service_FS, ///< The FS (Filesystem) service implementation
|
||||
Service_ERR, ///< The ERR (Error) port implementation
|
||||
Service_ACT, ///< The ACT (Account) service
|
||||
Service_APT, ///< The APT (Applets) service
|
||||
Service_BOSS, ///< The BOSS (SpotPass) service
|
||||
Service_GSP, ///< The GSP (GPU control) service
|
||||
|
@ -8,8 +8,8 @@
|
||||
#include "core/frontend/applets/swkbd.h"
|
||||
|
||||
namespace Frontend {
|
||||
void RegisterDefaultApplets() {
|
||||
Core::System::GetInstance().RegisterSoftwareKeyboard(std::make_shared<DefaultKeyboard>());
|
||||
Core::System::GetInstance().RegisterMiiSelector(std::make_shared<DefaultMiiSelector>());
|
||||
void RegisterDefaultApplets(Core::System& system) {
|
||||
system.RegisterSoftwareKeyboard(std::make_shared<DefaultKeyboard>());
|
||||
system.RegisterMiiSelector(std::make_shared<DefaultMiiSelector>());
|
||||
}
|
||||
} // namespace Frontend
|
||||
|
@ -9,5 +9,5 @@ namespace Frontend {
|
||||
* Registers default, frontend-independent applet implementations.
|
||||
* Will be replaced later if any frontend-specific implementation is available.
|
||||
*/
|
||||
void RegisterDefaultApplets();
|
||||
void RegisterDefaultApplets(Core::System& system);
|
||||
} // namespace Frontend
|
||||
|
@ -74,7 +74,7 @@ void Recorder::SetRequestInfo(const std::shared_ptr<Kernel::Thread>& client_thre
|
||||
const u32 thread_id = client_thread->GetThreadId();
|
||||
if (!record_map.count(thread_id)) {
|
||||
// This is possible when the recorder is enabled after application started
|
||||
LOG_ERROR(Kernel, "No request is assoicated with the thread");
|
||||
LOG_ERROR(Kernel, "No request is associated with the thread");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -113,7 +113,7 @@ void Recorder::SetReplyInfo(const std::shared_ptr<Kernel::Thread>& client_thread
|
||||
const u32 thread_id = client_thread->GetThreadId();
|
||||
if (!record_map.count(thread_id)) {
|
||||
// This is possible when the recorder is enabled after application started
|
||||
LOG_ERROR(Kernel, "No request is assoicated with the thread");
|
||||
LOG_ERROR(Kernel, "No request is associated with the thread");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -133,7 +133,7 @@ void Recorder::SetHLEUnimplemented(const std::shared_ptr<Kernel::Thread>& client
|
||||
const u32 thread_id = client_thread->GetThreadId();
|
||||
if (!record_map.count(thread_id)) {
|
||||
// This is possible when the recorder is enabled after application started
|
||||
LOG_ERROR(Kernel, "No request is assoicated with the thread");
|
||||
LOG_ERROR(Kernel, "No request is associated with the thread");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -87,8 +87,7 @@ Handler::Handler(Core::Timing& timing) : timing(timing) {
|
||||
shared_page.sliderstate_3d = static_cast<float_le>(slidestate);
|
||||
}
|
||||
|
||||
/// Gets system time in 3DS format. The epoch is Jan 1900, and the unit is millisecond.
|
||||
u64 Handler::GetSystemTime() const {
|
||||
u64 Handler::GetSystemTimeSince2000() const {
|
||||
std::chrono::milliseconds now =
|
||||
init_time + std::chrono::duration_cast<std::chrono::milliseconds>(timing.GetGlobalTimeUs());
|
||||
|
||||
@ -104,23 +103,25 @@ u64 Handler::GetSystemTime() const {
|
||||
epoch_tm.tm_isdst = 0;
|
||||
s64 epoch = std::mktime(&epoch_tm) * 1000;
|
||||
|
||||
// 3DS console time uses Jan 1 1900 as internal epoch,
|
||||
// so we use the milliseconds between 1900 and 2000 as base console time
|
||||
u64 console_time = 3155673600000ULL;
|
||||
|
||||
// Only when system time is after 2000, we set it as 3DS system time
|
||||
if (now.count() > epoch) {
|
||||
console_time += (now.count() - epoch);
|
||||
return now.count() - epoch;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return console_time;
|
||||
u64 Handler::GetSystemTimeSince1900() const {
|
||||
// 3DS console time uses Jan 1 1900 as internal epoch,
|
||||
// so we use the milliseconds between 1900 and 2000 as base console time
|
||||
return 3155673600000ULL + GetSystemTimeSince2000();
|
||||
}
|
||||
|
||||
void Handler::UpdateTimeCallback(std::uintptr_t user_data, int cycles_late) {
|
||||
DateTime& date_time =
|
||||
shared_page.date_time_counter % 2 ? shared_page.date_time_0 : shared_page.date_time_1;
|
||||
|
||||
date_time.date_time = GetSystemTime();
|
||||
date_time.date_time = GetSystemTimeSince1900();
|
||||
date_time.update_tick = timing.GetTicks();
|
||||
date_time.tick_to_second_coefficient = BASE_CLOCK_RATE_ARM11;
|
||||
date_time.tick_offset = 0;
|
||||
|
@ -110,8 +110,13 @@ public:
|
||||
return sizeof(shared_page);
|
||||
}
|
||||
|
||||
/// Gets the system time in milliseconds since the year 2000.
|
||||
u64 GetSystemTimeSince2000() const;
|
||||
|
||||
/// Gets the system time in milliseconds since the year 1900.
|
||||
u64 GetSystemTimeSince1900() const;
|
||||
|
||||
private:
|
||||
u64 GetSystemTime() const;
|
||||
void UpdateTimeCallback(std::uintptr_t user_data, int cycles_late);
|
||||
Core::Timing& timing;
|
||||
Core::TimingEventType* update_time_event;
|
||||
|
@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/hle/ipc_helpers.h"
|
||||
#include "core/hle/service/act/act.h"
|
||||
#include "core/hle/service/act/act_a.h"
|
||||
#include "core/hle/service/act/act_u.h"
|
||||
@ -14,6 +15,35 @@ Module::Interface::Interface(std::shared_ptr<Module> act, const char* name)
|
||||
|
||||
Module::Interface::~Interface() = default;
|
||||
|
||||
void Module::Interface::Initialize(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x1, 2, 4); // 0x10084
|
||||
const auto sdk_version = rp.Pop<u32>();
|
||||
const auto shared_memory_size = rp.Pop<u32>();
|
||||
const auto caller_pid = rp.PopPID();
|
||||
[[maybe_unused]] const auto shared_memory = rp.PopObject<Kernel::SharedMemory>();
|
||||
|
||||
LOG_DEBUG(Service_ACT,
|
||||
"(STUBBED) called sdk_version={:08X}, shared_memory_size={:08X}, caller_pid={}",
|
||||
sdk_version, shared_memory_size, caller_pid);
|
||||
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
void Module::Interface::GetAccountDataBlock(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x6, 3, 2); // 0x600C2
|
||||
const auto unknown = rp.Pop<u8>();
|
||||
const auto size = rp.Pop<u32>();
|
||||
const auto block_id = rp.Pop<u32>();
|
||||
[[maybe_unused]] auto output_buffer = rp.PopMappedBuffer();
|
||||
|
||||
LOG_DEBUG(Service_ACT, "(STUBBED) called unknown={:02X}, size={:08X}, block_id={:08X}", unknown,
|
||||
size, block_id);
|
||||
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
void InstallInterfaces(Core::System& system) {
|
||||
auto& service_manager = system.ServiceManager();
|
||||
auto act = std::make_shared<Module>();
|
||||
|
@ -22,6 +22,33 @@ public:
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Module> act;
|
||||
|
||||
/**
|
||||
* ACT::Initialize service function.
|
||||
* Inputs:
|
||||
* 1 : SDK version
|
||||
* 2 : Shared Memory Size
|
||||
* 3 : PID Translation Header (0x20)
|
||||
* 4 : Caller PID
|
||||
* 5 : Handle Translation Header (0x0)
|
||||
* 6 : Shared Memory Handle
|
||||
* Outputs:
|
||||
* 1 : Result of function, 0 on success, otherwise error code
|
||||
*/
|
||||
void Initialize(Kernel::HLERequestContext& ctx);
|
||||
|
||||
/**
|
||||
* ACT::GetAccountDataBlock service function.
|
||||
* Inputs:
|
||||
* 1 : u8 Unknown
|
||||
* 2 : Size
|
||||
* 3 : Block ID
|
||||
* 4 : Output Buffer Mapping Translation Header ((Size << 4) | 0xC)
|
||||
* 5 : Output Buffer Pointer
|
||||
* Outputs:
|
||||
* 1 : Result of function, 0 on success, otherwise error code
|
||||
*/
|
||||
void GetAccountDataBlock(Kernel::HLERequestContext& ctx);
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -11,9 +11,9 @@ ACT_A::ACT_A(std::shared_ptr<Module> act) : Module::Interface(std::move(act), "a
|
||||
const FunctionInfo functions[] = {
|
||||
// act:u shared commands
|
||||
// clang-format off
|
||||
{IPC::MakeHeader(0x0001, 2, 4), nullptr, "Initialize"},
|
||||
{IPC::MakeHeader(0x0001, 2, 4), &ACT_A::Initialize, "Initialize"},
|
||||
{IPC::MakeHeader(0x0002, 1, 0), nullptr, "GetErrorCode"},
|
||||
{IPC::MakeHeader(0x0006, 3, 2), nullptr, "GetAccountDataBlock"},
|
||||
{IPC::MakeHeader(0x0006, 3, 2), &ACT_A::GetAccountDataBlock, "GetAccountDataBlock"},
|
||||
{IPC::MakeHeader(0x000B, 1, 2), nullptr, "AcquireEulaList"},
|
||||
{IPC::MakeHeader(0x000D, 1, 0), nullptr, "GenerateUuid"},
|
||||
// act:a
|
||||
|
@ -10,9 +10,9 @@ namespace Service::ACT {
|
||||
ACT_U::ACT_U(std::shared_ptr<Module> act) : Module::Interface(std::move(act), "act:u") {
|
||||
static const FunctionInfo functions[] = {
|
||||
// clang-format off
|
||||
{IPC::MakeHeader(0x0001, 2, 4), nullptr, "Initialize"},
|
||||
{IPC::MakeHeader(0x0001, 2, 4), &ACT_U::Initialize, "Initialize"},
|
||||
{IPC::MakeHeader(0x0002, 1, 0), nullptr, "GetErrorCode"},
|
||||
{IPC::MakeHeader(0x0006, 3, 2), nullptr, "GetAccountDataBlock"},
|
||||
{IPC::MakeHeader(0x0006, 3, 2), &ACT_U::GetAccountDataBlock, "GetAccountDataBlock"},
|
||||
{IPC::MakeHeader(0x000B, 1, 2), nullptr, "AcquireEulaList"},
|
||||
{IPC::MakeHeader(0x000D, 1, 0), nullptr, "GenerateUuid"},
|
||||
// clang-format on
|
||||
|
@ -1121,6 +1121,17 @@ void Module::Interface::GetTicketList(Kernel::HLERequestContext& ctx) {
|
||||
ticket_list_count, ticket_index);
|
||||
}
|
||||
|
||||
void Module::Interface::NeedsCleanup(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x0013, 1, 0); // 0x00130040
|
||||
const auto media_type = rp.Pop<u8>();
|
||||
|
||||
LOG_DEBUG(Service_AM, "(STUBBED) media_type=0x{:02x}", media_type);
|
||||
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(2, 0);
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<bool>(false);
|
||||
}
|
||||
|
||||
void Module::Interface::QueryAvailableTitleDatabase(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x0019, 1, 0); // 0x190040
|
||||
u8 media_type = rp.Pop<u8>();
|
||||
|
@ -357,6 +357,16 @@ public:
|
||||
*/
|
||||
void GetTicketList(Kernel::HLERequestContext& ctx);
|
||||
|
||||
/**
|
||||
* AM::NeedsCleanup service function
|
||||
* Inputs:
|
||||
* 1 : Media Type
|
||||
* Outputs:
|
||||
* 1 : Result, 0 on success, otherwise error code
|
||||
* 2 : bool, Needs Cleanup
|
||||
*/
|
||||
void NeedsCleanup(Kernel::HLERequestContext& ctx);
|
||||
|
||||
/**
|
||||
* AM::QueryAvailableTitleDatabase service function
|
||||
* Inputs:
|
||||
|
@ -28,7 +28,7 @@ AM_NET::AM_NET(std::shared_ptr<Module> am) : Module::Interface(std::move(am), "a
|
||||
{IPC::MakeHeader(0x0010, 4, 2), nullptr, "GetImportContentContextList"},
|
||||
{IPC::MakeHeader(0x0011, 4, 4), nullptr, "GetImportContentContexts"},
|
||||
{IPC::MakeHeader(0x0012, 4, 2), nullptr, "DeleteImportContentContexts"},
|
||||
{IPC::MakeHeader(0x0013, 1, 0), nullptr, "NeedsCleanup"},
|
||||
{IPC::MakeHeader(0x0013, 1, 0), &AM_NET::NeedsCleanup, "NeedsCleanup"},
|
||||
{IPC::MakeHeader(0x0014, 1, 0), nullptr, "DoCleanup"},
|
||||
{IPC::MakeHeader(0x0015, 1, 0), nullptr, "DeleteAllImportContexts"},
|
||||
{IPC::MakeHeader(0x0016, 0, 0), nullptr, "DeleteAllTemporaryPrograms"},
|
||||
|
@ -28,7 +28,7 @@ AM_SYS::AM_SYS(std::shared_ptr<Module> am) : Module::Interface(std::move(am), "a
|
||||
{IPC::MakeHeader(0x0010, 4, 2), nullptr, "GetImportContentContextList"},
|
||||
{IPC::MakeHeader(0x0011, 4, 4), nullptr, "GetImportContentContexts"},
|
||||
{IPC::MakeHeader(0x0012, 4, 2), nullptr, "DeleteImportContentContexts"},
|
||||
{IPC::MakeHeader(0x0013, 1, 0), nullptr, "NeedsCleanup"},
|
||||
{IPC::MakeHeader(0x0013, 1, 0), &AM_SYS::NeedsCleanup, "NeedsCleanup"},
|
||||
{IPC::MakeHeader(0x0014, 1, 0), nullptr, "DoCleanup"},
|
||||
{IPC::MakeHeader(0x0015, 1, 0), nullptr, "DeleteAllImportContexts"},
|
||||
{IPC::MakeHeader(0x0016, 0, 0), nullptr, "DeleteAllTemporaryPrograms"},
|
||||
|
@ -28,7 +28,7 @@ AM_U::AM_U(std::shared_ptr<Module> am) : Module::Interface(std::move(am), "am:u"
|
||||
{IPC::MakeHeader(0x0010, 4, 2), nullptr, "GetImportContentContextList"},
|
||||
{IPC::MakeHeader(0x0011, 4, 4), nullptr, "GetImportContentContexts"},
|
||||
{IPC::MakeHeader(0x0012, 4, 2), nullptr, "DeleteImportContentContexts"},
|
||||
{IPC::MakeHeader(0x0013, 1, 0), nullptr, "NeedsCleanup"},
|
||||
{IPC::MakeHeader(0x0013, 1, 0), &AM_U::NeedsCleanup, "NeedsCleanup"},
|
||||
{IPC::MakeHeader(0x0014, 1, 0), nullptr, "DoCleanup"},
|
||||
{IPC::MakeHeader(0x0015, 1, 0), nullptr, "DeleteAllImportContexts"},
|
||||
{IPC::MakeHeader(0x0016, 0, 0), nullptr, "DeleteAllTemporaryPrograms"},
|
||||
|
@ -271,6 +271,17 @@ void Module::Interface::SecureInfoGetRegion(Kernel::HLERequestContext& ctx, u16
|
||||
rb.Push<u8>(static_cast<u8>(cfg->GetRegionValue()));
|
||||
}
|
||||
|
||||
void Module::Interface::SecureInfoGetByte101(Kernel::HLERequestContext& ctx, u16 id) {
|
||||
IPC::RequestParser rp(ctx, id, 0, 0);
|
||||
|
||||
LOG_DEBUG(Service_CFG, "(STUBBED) called");
|
||||
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(2, 0);
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
// According to 3dbrew this is normally 0.
|
||||
rb.Push<u8>(0);
|
||||
}
|
||||
|
||||
void Module::Interface::GenHashConsoleUnique(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x03, 1, 0);
|
||||
const u32 app_id_salt = rp.Pop<u32>() & 0x000FFFFF;
|
||||
|
@ -165,6 +165,17 @@ public:
|
||||
*/
|
||||
void SecureInfoGetRegion(Kernel::HLERequestContext& ctx, u16 id);
|
||||
|
||||
/**
|
||||
* CFG::SecureInfoGetByte101 service function
|
||||
* Inputs:
|
||||
* 1 : None
|
||||
* Outputs:
|
||||
* 0 : Result Header code
|
||||
* 1 : Result of function, 0 on success, otherwise error code
|
||||
* 2 : Value loaded from SecureInfo offset 0x101
|
||||
*/
|
||||
void SecureInfoGetByte101(Kernel::HLERequestContext& ctx, u16 id);
|
||||
|
||||
/**
|
||||
* CFG::GenHashConsoleUnique service function
|
||||
* Inputs:
|
||||
|
@ -31,7 +31,7 @@ CFG_I::CFG_I(std::shared_ptr<Module> cfg) : Module::Interface(std::move(cfg), "c
|
||||
{IPC::MakeHeader(0x0404, 1, 2), nullptr, "GetLocalFriendCodeSeedData"},
|
||||
{IPC::MakeHeader(0x0405, 0, 0), nullptr, "GetLocalFriendCodeSeed"},
|
||||
{IPC::MakeHeader(0x0406, 0, 0), &CFG_I::D<&CFG_I::SecureInfoGetRegion, 0x0406>, "SecureInfoGetRegion"},
|
||||
{IPC::MakeHeader(0x0407, 0, 0), nullptr, "SecureInfoGetByte101"},
|
||||
{IPC::MakeHeader(0x0407, 0, 0), &CFG_I::D<&CFG_I::SecureInfoGetByte101, 0x0407>, "SecureInfoGetByte101"},
|
||||
{IPC::MakeHeader(0x0408, 1, 2), nullptr, "SecureInfoGetSerialNo"},
|
||||
{IPC::MakeHeader(0x0409, 0, 0), nullptr, "UpdateConfigBlk00040003"},
|
||||
{IPC::MakeHeader(0x0801, 2, 2), &CFG_I::D<&CFG_I::GetConfigInfoBlk8, 0x0801>, "GetConfigInfoBlk8"},
|
||||
@ -55,7 +55,7 @@ CFG_I::CFG_I(std::shared_ptr<Module> cfg) : Module::Interface(std::move(cfg), "c
|
||||
{IPC::MakeHeader(0x0814, 1, 2), nullptr, "SecureInfoGetData"},
|
||||
{IPC::MakeHeader(0x0815, 1, 2), nullptr, "SecureInfoGetSignature"},
|
||||
{IPC::MakeHeader(0x0816, 0, 0), &CFG_I::D<&CFG_I::SecureInfoGetRegion, 0x0816>, "SecureInfoGetRegion"},
|
||||
{IPC::MakeHeader(0x0817, 0, 0), nullptr, "SecureInfoGetByte101"},
|
||||
{IPC::MakeHeader(0x0817, 0, 0), &CFG_I::D<&CFG_I::SecureInfoGetByte101, 0x0817>, "SecureInfoGetByte101"},
|
||||
{IPC::MakeHeader(0x0818, 1, 2), nullptr, "SecureInfoGetSerialNo"},
|
||||
// clang-format on
|
||||
};
|
||||
|
@ -31,7 +31,7 @@ CFG_S::CFG_S(std::shared_ptr<Module> cfg) : Module::Interface(std::move(cfg), "c
|
||||
{IPC::MakeHeader(0x0404, 1, 2), nullptr, "GetLocalFriendCodeSeedData"},
|
||||
{IPC::MakeHeader(0x0405, 0, 0), nullptr, "GetLocalFriendCodeSeed"},
|
||||
{IPC::MakeHeader(0x0406, 0, 0), &CFG_S::D<&CFG_S::SecureInfoGetRegion, 0x0406>, "SecureInfoGetRegion"},
|
||||
{IPC::MakeHeader(0x0407, 0, 0), nullptr, "SecureInfoGetByte101"},
|
||||
{IPC::MakeHeader(0x0407, 0, 0), &CFG_S::D<&CFG_S::SecureInfoGetByte101, 0x0407>, "SecureInfoGetByte101"},
|
||||
{IPC::MakeHeader(0x0408, 1, 2), nullptr, "SecureInfoGetSerialNo"},
|
||||
{IPC::MakeHeader(0x0409, 0, 0), nullptr, "UpdateConfigBlk00040003"},
|
||||
// clang-format on
|
||||
|
@ -80,23 +80,29 @@ struct State {
|
||||
|
||||
void WriteSamples(std::span<const u8> samples) {
|
||||
u32 bytes_total_written = 0;
|
||||
const std::size_t remaining_space = size - offset;
|
||||
std::size_t bytes_to_write = std::min(samples.size(), remaining_space);
|
||||
auto sample_buffer = sharedmem_buffer + initial_offset;
|
||||
// Do not let sampling buffer overrun shared memory space.
|
||||
const auto sample_buffer_size =
|
||||
std::min(size, sharedmem_size - initial_offset - sizeof(u32));
|
||||
|
||||
// Write as many samples as we can to the buffer.
|
||||
// TODO if the sample size is 16bit, this could theoretically cut a sample in the case where
|
||||
// the application configures an odd size
|
||||
std::memcpy(sharedmem_buffer + offset, samples.data(), bytes_to_write);
|
||||
offset += static_cast<u32>(bytes_to_write);
|
||||
bytes_total_written += static_cast<u32>(bytes_to_write);
|
||||
|
||||
// If theres any samples left to write after we looped, go ahead and write them now
|
||||
if (looped_buffer && samples.size() > bytes_total_written) {
|
||||
offset = initial_offset;
|
||||
bytes_to_write = std::min(samples.size() - bytes_total_written, size);
|
||||
std::memcpy(sharedmem_buffer + offset, samples.data() + bytes_total_written,
|
||||
// Write samples in a loop until the input runs out
|
||||
while (samples.size() > bytes_total_written) {
|
||||
// TODO: If the sample size is 16-bit, this could theoretically cut a sample in the case
|
||||
// where the application configures an odd size.
|
||||
std::size_t bytes_to_write =
|
||||
std::min(samples.size() - bytes_total_written, sample_buffer_size - offset);
|
||||
std::memcpy(sample_buffer + offset, samples.data() + bytes_total_written,
|
||||
bytes_to_write);
|
||||
offset += static_cast<u32>(bytes_to_write);
|
||||
bytes_total_written += static_cast<u32>(bytes_to_write);
|
||||
|
||||
if (offset >= sample_buffer_size && looped_buffer) {
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
if (!looped_buffer) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// The last 4 bytes of the shared memory contains the latest offset
|
||||
@ -205,7 +211,8 @@ struct MIC_U::Impl {
|
||||
}
|
||||
|
||||
u8 sample_size = encoding == Encoding::PCM8Signed || encoding == Encoding::PCM8 ? 8 : 16;
|
||||
state.offset = state.initial_offset = audio_buffer_offset;
|
||||
state.offset = 0;
|
||||
state.initial_offset = audio_buffer_offset;
|
||||
state.sample_rate = sample_rate;
|
||||
state.sample_size = sample_size;
|
||||
state.looped_buffer = audio_buffer_loop;
|
||||
|
@ -10,6 +10,13 @@ SERIALIZE_EXPORT_IMPL(Service::NEWS::NEWS_S)
|
||||
|
||||
namespace Service::NEWS {
|
||||
|
||||
struct NewsDbHeader {
|
||||
u8 unknown_one;
|
||||
u8 flags;
|
||||
INSERT_PADDING_BYTES(0xE);
|
||||
};
|
||||
static_assert(sizeof(NewsDbHeader) == 0x10, "News DB Header structure size is wrong");
|
||||
|
||||
void NEWS_S::GetTotalNotifications(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x5, 0, 0);
|
||||
|
||||
@ -21,6 +28,22 @@ void NEWS_S::GetTotalNotifications(Kernel::HLERequestContext& ctx) {
|
||||
rb.Push<u32>(0);
|
||||
}
|
||||
|
||||
void NEWS_S::GetNewsDBHeader(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0xA, 1, 2);
|
||||
const auto size = rp.Pop<u32>();
|
||||
auto output_buffer = rp.PopMappedBuffer();
|
||||
|
||||
LOG_WARNING(Service, "(STUBBED) called size={}", size);
|
||||
|
||||
NewsDbHeader dummy = {.unknown_one = 1, .flags = 0};
|
||||
output_buffer.Write(&dummy, 0, std::min(sizeof(NewsDbHeader), static_cast<std::size_t>(size)));
|
||||
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(2, 0);
|
||||
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u32>(size);
|
||||
}
|
||||
|
||||
NEWS_S::NEWS_S() : ServiceFramework("news:s", 2) {
|
||||
const FunctionInfo functions[] = {
|
||||
// clang-format off
|
||||
@ -30,7 +53,7 @@ NEWS_S::NEWS_S() : ServiceFramework("news:s", 2) {
|
||||
{IPC::MakeHeader(0x0007, 2, 2), nullptr, "SetNotificationHeader"},
|
||||
{IPC::MakeHeader(0x0008, 2, 2), nullptr, "SetNotificationMessage"},
|
||||
{IPC::MakeHeader(0x0009, 2, 2), nullptr, "SetNotificationImage"},
|
||||
{IPC::MakeHeader(0x000A, 1, 2), nullptr, "GetNewsDBHeader"},
|
||||
{IPC::MakeHeader(0x000A, 1, 2), &NEWS_S::GetNewsDBHeader, "GetNewsDBHeader"},
|
||||
{IPC::MakeHeader(0x000B, 2, 2), nullptr, "GetNotificationHeader"},
|
||||
{IPC::MakeHeader(0x000C, 2, 2), nullptr, "GetNotificationMessage"},
|
||||
{IPC::MakeHeader(0x000D, 2, 2), nullptr, "GetNotificationImage"},
|
||||
|
@ -25,6 +25,20 @@ private:
|
||||
*/
|
||||
void GetTotalNotifications(Kernel::HLERequestContext& ctx);
|
||||
|
||||
/**
|
||||
* GetNewsDBHeader service function.
|
||||
* Inputs:
|
||||
* 0 : 0x000A0042
|
||||
* 1 : Size
|
||||
* 2 : Output Buffer Mapping Translation Header ((Size << 4) | 0xC)
|
||||
* 3 : Output Buffer Pointer
|
||||
* Outputs:
|
||||
* 0 : 0x000A0080
|
||||
* 1 : Result of function, 0 on success, otherwise error code
|
||||
* 2 : Actual Size
|
||||
*/
|
||||
void GetNewsDBHeader(Kernel::HLERequestContext& ctx);
|
||||
|
||||
SERVICE_SERIALIZATION_SIMPLE
|
||||
};
|
||||
|
||||
|
@ -218,7 +218,10 @@ ResultCode NfcDevice::StartDetection(TagProtocol allowed_protocol) {
|
||||
return ResultInvalidOperation;
|
||||
}
|
||||
|
||||
// TODO: Set console in search mode here
|
||||
// Ensure external device is active
|
||||
if (communication_state == CommunicationState::Idle) {
|
||||
StartCommunication();
|
||||
}
|
||||
|
||||
device_state = DeviceState::SearchingForTag;
|
||||
allowed_protocols = allowed_protocol;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "core/file_sys/archive_extsavedata.h"
|
||||
#include "core/file_sys/errors.h"
|
||||
#include "core/file_sys/file_backend.h"
|
||||
#include "core/hle/kernel/shared_page.h"
|
||||
#include "core/hle/service/ptm/ptm.h"
|
||||
#include "core/hle/service/ptm/ptm_gets.h"
|
||||
#include "core/hle/service/ptm/ptm_play.h"
|
||||
@ -132,6 +133,17 @@ void Module::Interface::CheckNew3DS(Kernel::HLERequestContext& ctx) {
|
||||
Service::PTM::CheckNew3DS(rb);
|
||||
}
|
||||
|
||||
void Module::Interface::GetSystemTime(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x401, 0, 0);
|
||||
|
||||
auto& share_page = Core::System::GetInstance().Kernel().GetSharedPageHandler();
|
||||
const u64 console_time = share_page.GetSystemTimeSince2000();
|
||||
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(3, 0);
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push(console_time);
|
||||
}
|
||||
|
||||
static void WriteGameCoinData(GameCoin gamecoin_data) {
|
||||
const std::string& nand_directory = FileUtil::GetUserPath(FileUtil::UserPath::NANDDir);
|
||||
FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true);
|
||||
|
@ -137,6 +137,14 @@ public:
|
||||
*/
|
||||
void CheckNew3DS(Kernel::HLERequestContext& ctx);
|
||||
|
||||
/**
|
||||
* PTM::GetSystemTime service function
|
||||
* Outputs:
|
||||
* 1: Result code, 0 on success, otherwise error code
|
||||
* 2-3: Time since 01/01/2020.
|
||||
*/
|
||||
void GetSystemTime(Kernel::HLERequestContext& ctx);
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Module> ptm;
|
||||
};
|
||||
|
@ -30,7 +30,7 @@ PTM_Gets::PTM_Gets(std::shared_ptr<Module> ptm)
|
||||
{IPC::MakeHeader(0x000E, 0, 0), nullptr, "GetPedometerRecordingMode"},
|
||||
{IPC::MakeHeader(0x000F, 2, 4), nullptr, "GetStepHistoryAll"},
|
||||
// ptm:gets
|
||||
{IPC::MakeHeader(0x0401, 0, 0), nullptr, "GetSystemTime"},
|
||||
{IPC::MakeHeader(0x0401, 0, 0), &PTM_Gets::GetSystemTime, "GetSystemTime"},
|
||||
// clang-format on
|
||||
};
|
||||
RegisterHandlers(functions);
|
||||
|
@ -643,7 +643,7 @@ std::string MemorySystem::ReadCString(VAddr vaddr, std::size_t max_length) {
|
||||
return string;
|
||||
}
|
||||
|
||||
u8* MemorySystem::GetPhysicalPointer(PAddr address) {
|
||||
u8* MemorySystem::GetPhysicalPointer(PAddr address) const {
|
||||
return GetPhysicalRef(address);
|
||||
}
|
||||
|
||||
|
@ -576,7 +576,7 @@ public:
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
|
||||
|
||||
/// Gets a pointer to the memory region beginning at the specified physical address.
|
||||
u8* GetPhysicalPointer(PAddr address);
|
||||
u8* GetPhysicalPointer(PAddr address) const;
|
||||
|
||||
/// Returns a reference to the memory region beginning at the specified physical address
|
||||
MemoryRef GetPhysicalRef(PAddr address) const;
|
||||
|
@ -95,6 +95,8 @@ add_library(video_core STATIC
|
||||
renderer_software/sw_proctex.h
|
||||
renderer_software/sw_rasterizer.cpp
|
||||
renderer_software/sw_rasterizer.h
|
||||
renderer_software/sw_tev_jit.cpp
|
||||
renderer_software/sw_tev_jit.h
|
||||
renderer_software/sw_texturing.cpp
|
||||
renderer_software/sw_texturing.h
|
||||
renderer_vulkan/pica_to_vk.h
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "core/hw/hw.h"
|
||||
#include "core/hw/lcd.h"
|
||||
#include "video_core/renderer_software/renderer_software.h"
|
||||
#include "video_core/renderer_software/sw_rasterizer.h"
|
||||
|
||||
namespace SwRenderer {
|
||||
|
||||
@ -17,6 +18,10 @@ RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& wi
|
||||
|
||||
RendererSoftware::~RendererSoftware() = default;
|
||||
|
||||
VideoCore::RasterizerInterface* RendererSoftware::Rasterizer() const {
|
||||
return rasterizer.get();
|
||||
}
|
||||
|
||||
void RendererSoftware::SwapBuffers() {
|
||||
PrepareRenderTarget();
|
||||
EndFrame();
|
||||
|
@ -5,7 +5,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_software/sw_rasterizer.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
@ -19,19 +18,18 @@ struct ScreenInfo {
|
||||
std::vector<u8> pixels;
|
||||
};
|
||||
|
||||
class RasterizerSoftware;
|
||||
|
||||
class RendererSoftware : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererSoftware(Core::System& system, Frontend::EmuWindow& window);
|
||||
~RendererSoftware() override;
|
||||
|
||||
[[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() const override {
|
||||
return rasterizer.get();
|
||||
}
|
||||
|
||||
[[nodiscard]] const ScreenInfo& Screen(VideoCore::ScreenId id) const noexcept {
|
||||
return screen_infos[static_cast<u32>(id)];
|
||||
}
|
||||
|
||||
VideoCore::RasterizerInterface* Rasterizer() const override;
|
||||
void SwapBuffers() override;
|
||||
void TryPresent(int timeout_ms, bool is_secondary) override {}
|
||||
void Sync() override {}
|
||||
|
@ -41,10 +41,22 @@ Framebuffer::Framebuffer(Memory::MemorySystem& memory_, const Pica::FramebufferR
|
||||
|
||||
Framebuffer::~Framebuffer() = default;
|
||||
|
||||
void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
||||
void Framebuffer::Bind() {
|
||||
PAddr addr = regs.framebuffer.GetColorBufferPhysicalAddress();
|
||||
if (color_addr != addr) [[unlikely]] {
|
||||
color_addr = addr;
|
||||
color_buffer = memory.GetPhysicalPointer(color_addr);
|
||||
}
|
||||
|
||||
addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
|
||||
if (depth_addr != addr) [[unlikely]] {
|
||||
depth_addr = addr;
|
||||
depth_buffer = memory.GetPhysicalPointer(depth_addr);
|
||||
}
|
||||
}
|
||||
|
||||
void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
// Similarly to textures, the render framebuffer is laid out from bottom to top, too.
|
||||
// NOTE: The framebuffer height register contains the actual FB height minus one.
|
||||
y = framebuffer.height - y;
|
||||
@ -54,8 +66,7 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
|
||||
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
||||
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
||||
coarse_y * framebuffer.width * bytes_per_pixel;
|
||||
u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
||||
u8* dst_pixel = depth_buffer + dst_offset;
|
||||
u8* dst_pixel = color_buffer + dst_offset;
|
||||
|
||||
switch (framebuffer.color_format) {
|
||||
case FramebufferRegs::ColorFormat::RGBA8:
|
||||
@ -80,10 +91,8 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
|
||||
}
|
||||
}
|
||||
|
||||
const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
|
||||
const Common::Vec4<u8> Framebuffer::GetPixel(u32 x, u32 y) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
||||
|
||||
y = framebuffer.height - y;
|
||||
|
||||
const u32 coarse_y = y & ~7;
|
||||
@ -91,7 +100,6 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
|
||||
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
||||
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
||||
coarse_y * framebuffer.width * bytes_per_pixel;
|
||||
const u8* color_buffer = memory.GetPhysicalPointer(addr);
|
||||
const u8* src_pixel = color_buffer + src_offset;
|
||||
|
||||
switch (framebuffer.color_format) {
|
||||
@ -114,10 +122,8 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
|
||||
return {0, 0, 0, 0};
|
||||
}
|
||||
|
||||
u32 Framebuffer::GetDepth(int x, int y) const {
|
||||
u32 Framebuffer::GetDepth(u32 x, u32 y) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
||||
|
||||
y = framebuffer.height - y;
|
||||
|
||||
const u32 coarse_y = y & ~7;
|
||||
@ -125,7 +131,6 @@ u32 Framebuffer::GetDepth(int x, int y) const {
|
||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||
|
||||
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||
const u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
||||
const u8* src_pixel = depth_buffer + src_offset;
|
||||
|
||||
switch (framebuffer.depth_format) {
|
||||
@ -143,10 +148,8 @@ u32 Framebuffer::GetDepth(int x, int y) const {
|
||||
}
|
||||
}
|
||||
|
||||
u8 Framebuffer::GetStencil(int x, int y) const {
|
||||
u8 Framebuffer::GetStencil(u32 x, u32 y) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
||||
|
||||
y = framebuffer.height - y;
|
||||
|
||||
const u32 coarse_y = y & ~7;
|
||||
@ -154,7 +157,6 @@ u8 Framebuffer::GetStencil(int x, int y) const {
|
||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||
|
||||
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||
const u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
||||
const u8* src_pixel = depth_buffer + src_offset;
|
||||
|
||||
switch (framebuffer.depth_format) {
|
||||
@ -169,10 +171,8 @@ u8 Framebuffer::GetStencil(int x, int y) const {
|
||||
}
|
||||
}
|
||||
|
||||
void Framebuffer::SetDepth(int x, int y, u32 value) const {
|
||||
void Framebuffer::SetDepth(u32 x, u32 y, u32 value) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
||||
|
||||
y = framebuffer.height - y;
|
||||
|
||||
const u32 coarse_y = y & ~7;
|
||||
@ -180,7 +180,6 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const {
|
||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||
|
||||
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||
u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
||||
u8* dst_pixel = depth_buffer + dst_offset;
|
||||
|
||||
switch (framebuffer.depth_format) {
|
||||
@ -201,10 +200,8 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const {
|
||||
}
|
||||
}
|
||||
|
||||
void Framebuffer::SetStencil(int x, int y, u8 value) const {
|
||||
void Framebuffer::SetStencil(u32 x, u32 y, u8 value) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
||||
|
||||
y = framebuffer.height - y;
|
||||
|
||||
const u32 coarse_y = y & ~7;
|
||||
@ -212,7 +209,6 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const {
|
||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||
|
||||
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||
u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
||||
u8* dst_pixel = depth_buffer + dst_offset;
|
||||
|
||||
switch (framebuffer.depth_format) {
|
||||
@ -231,7 +227,7 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const {
|
||||
}
|
||||
}
|
||||
|
||||
void Framebuffer::DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const {
|
||||
void Framebuffer::DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const {
|
||||
const auto& framebuffer = regs.framebuffer;
|
||||
const auto& shadow = regs.shadow;
|
||||
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
||||
|
@ -23,30 +23,37 @@ public:
|
||||
explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer);
|
||||
~Framebuffer();
|
||||
|
||||
/// Updates the framebuffer addresses from the PICA registers.
|
||||
void Bind();
|
||||
|
||||
/// Draws a pixel at the specified coordinates.
|
||||
void DrawPixel(int x, int y, const Common::Vec4<u8>& color) const;
|
||||
void DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const;
|
||||
|
||||
/// Returns the current color at the specified coordinates.
|
||||
[[nodiscard]] const Common::Vec4<u8> GetPixel(int x, int y) const;
|
||||
[[nodiscard]] const Common::Vec4<u8> GetPixel(u32 x, u32 y) const;
|
||||
|
||||
/// Returns the depth value at the specified coordinates.
|
||||
[[nodiscard]] u32 GetDepth(int x, int y) const;
|
||||
[[nodiscard]] u32 GetDepth(u32 x, u32 y) const;
|
||||
|
||||
/// Returns the stencil value at the specified coordinates.
|
||||
[[nodiscard]] u8 GetStencil(int x, int y) const;
|
||||
[[nodiscard]] u8 GetStencil(u32 x, u32 y) const;
|
||||
|
||||
/// Stores the provided depth value at the specified coordinates.
|
||||
void SetDepth(int x, int y, u32 value) const;
|
||||
void SetDepth(u32 x, u32 y, u32 value) const;
|
||||
|
||||
/// Stores the provided stencil value at the specified coordinates.
|
||||
void SetStencil(int x, int y, u8 value) const;
|
||||
void SetStencil(u32 x, u32 y, u8 value) const;
|
||||
|
||||
/// Draws a pixel to the shadow buffer.
|
||||
void DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const;
|
||||
void DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const;
|
||||
|
||||
private:
|
||||
Memory::MemorySystem& memory;
|
||||
const Pica::FramebufferRegs& regs;
|
||||
PAddr color_addr;
|
||||
u8* color_buffer{};
|
||||
PAddr depth_addr;
|
||||
u8* depth_buffer{};
|
||||
};
|
||||
|
||||
u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref);
|
||||
|
@ -95,8 +95,14 @@ private:
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
// Kirby Blowout Blast relies on the combiner output of a previous draw
|
||||
// in order to render the sky correctly.
|
||||
static thread_local Common::Vec4<u8> combiner_output{};
|
||||
|
||||
RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_)
|
||||
: memory{memory_}, state{Pica::g_state}, regs{state.regs}, fb{memory, regs.framebuffer} {}
|
||||
: memory{memory_}, state{Pica::g_state}, regs{state.regs},
|
||||
num_sw_threads{std::max(std::thread::hardware_concurrency(), 2U)},
|
||||
sw_workers{num_sw_threads, "SwRenderer workers"}, fb{memory, regs.framebuffer} {}
|
||||
|
||||
void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
@ -289,167 +295,194 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||
|
||||
const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
|
||||
|
||||
auto textures = regs.texturing.GetTextures();
|
||||
const auto textures = regs.texturing.GetTextures();
|
||||
const auto tev_stages = regs.texturing.GetTevStages();
|
||||
for (u32 i = 0; i < texture_data.size(); i++) {
|
||||
const PAddr addr = textures[i].config.GetPhysicalAddress();
|
||||
if (addr) {
|
||||
texture_data[i] = memory.GetPhysicalPointer(addr);
|
||||
}
|
||||
}
|
||||
|
||||
fb.Bind();
|
||||
|
||||
if (use_jit) {
|
||||
const TevConfigKey key{regs.texturing};
|
||||
auto [it, new_fun] = tev_cache.try_emplace(key.Hash());
|
||||
if (new_fun) {
|
||||
it->second = std::make_unique<TevConfig>(regs, key);
|
||||
}
|
||||
tev_config = it->second.get();
|
||||
}
|
||||
|
||||
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
||||
// TODO: Not sure if looping through x first might be faster
|
||||
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
||||
for (u16 x = min_x + 8; x < max_x; x += 0x10) {
|
||||
// Do not process the pixel if it's inside the scissor box and the scissor mode is set
|
||||
// to Exclude.
|
||||
if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) {
|
||||
if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) {
|
||||
const auto process_scanline = [&, y] {
|
||||
for (u16 x = min_x + 8; x < max_x; x += 0x10) {
|
||||
// Do not process the pixel if it's inside the scissor box and the scissor mode is
|
||||
// set to Exclude.
|
||||
if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) {
|
||||
if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the barycentric coordinates w0, w1 and w2
|
||||
const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
||||
const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
|
||||
const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
|
||||
const s32 wsum = w0 + w1 + w2;
|
||||
|
||||
// If current pixel is not covered by the current primitive
|
||||
if (w0 < 0 || w1 < 0 || w2 < 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the barycentric coordinates w0, w1 and w2
|
||||
const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
||||
const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
|
||||
const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
|
||||
const s32 wsum = w0 + w1 + w2;
|
||||
const auto baricentric_coordinates = Common::MakeVec(
|
||||
f24::FromFloat32(static_cast<f32>(w0)), f24::FromFloat32(static_cast<f32>(w1)),
|
||||
f24::FromFloat32(static_cast<f32>(w2)));
|
||||
const f24 interpolated_w_inverse =
|
||||
f24::One() / Common::Dot(w_inverse, baricentric_coordinates);
|
||||
|
||||
// If current pixel is not covered by the current primitive
|
||||
if (w0 < 0 || w1 < 0 || w2 < 0) {
|
||||
continue;
|
||||
}
|
||||
// interpolated_z = z / w
|
||||
const float interpolated_z_over_w =
|
||||
(v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 +
|
||||
v2.screenpos[2].ToFloat32() * w2) /
|
||||
wsum;
|
||||
|
||||
const auto baricentric_coordinates = Common::MakeVec(
|
||||
f24::FromFloat32(static_cast<f32>(w0)), f24::FromFloat32(static_cast<f32>(w1)),
|
||||
f24::FromFloat32(static_cast<f32>(w2)));
|
||||
const f24 interpolated_w_inverse =
|
||||
f24::One() / Common::Dot(w_inverse, baricentric_coordinates);
|
||||
// Not fully accurate. About 3 bits in precision are missing.
|
||||
// Z-Buffer (z / w * scale + offset)
|
||||
const float depth_scale =
|
||||
f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32();
|
||||
const float depth_offset =
|
||||
f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32();
|
||||
float depth = interpolated_z_over_w * depth_scale + depth_offset;
|
||||
|
||||
// interpolated_z = z / w
|
||||
const float interpolated_z_over_w =
|
||||
(v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 +
|
||||
v2.screenpos[2].ToFloat32() * w2) /
|
||||
wsum;
|
||||
// Potentially switch to W-Buffer
|
||||
if (regs.rasterizer.depthmap_enable ==
|
||||
Pica::RasterizerRegs::DepthBuffering::WBuffering) {
|
||||
// W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
|
||||
depth *= interpolated_w_inverse.ToFloat32() * wsum;
|
||||
}
|
||||
|
||||
// Not fully accurate. About 3 bits in precision are missing.
|
||||
// Z-Buffer (z / w * scale + offset)
|
||||
const float depth_scale =
|
||||
f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32();
|
||||
const float depth_offset =
|
||||
f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32();
|
||||
float depth = interpolated_z_over_w * depth_scale + depth_offset;
|
||||
// Clamp the result
|
||||
depth = std::clamp(depth, 0.0f, 1.0f);
|
||||
|
||||
// Potentially switch to W-Buffer
|
||||
if (regs.rasterizer.depthmap_enable ==
|
||||
Pica::RasterizerRegs::DepthBuffering::WBuffering) {
|
||||
// W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
|
||||
depth *= interpolated_w_inverse.ToFloat32() * wsum;
|
||||
}
|
||||
|
||||
// Clamp the result
|
||||
depth = std::clamp(depth, 0.0f, 1.0f);
|
||||
|
||||
/**
|
||||
* Perspective correct attribute interpolation:
|
||||
* Attribute values cannot be calculated by simple linear interpolation since
|
||||
* they are not linear in screen space. For example, when interpolating a
|
||||
* texture coordinate across two vertices, something simple like
|
||||
* u = (u0*w0 + u1*w1)/(w0+w1)
|
||||
* will not work. However, the attribute value divided by the
|
||||
* clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
|
||||
* in screenspace. Hence, we can linearly interpolate these two independently and
|
||||
* calculate the interpolated attribute by dividing the results.
|
||||
* I.e.
|
||||
* u_over_w = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
|
||||
* one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
|
||||
* u = u_over_w / one_over_w
|
||||
*
|
||||
* The generalization to three vertices is straightforward in baricentric coordinates.
|
||||
**/
|
||||
const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) {
|
||||
auto attr_over_w = Common::MakeVec(attr0, attr1, attr2);
|
||||
f24 interpolated_attr_over_w = Common::Dot(attr_over_w, baricentric_coordinates);
|
||||
return interpolated_attr_over_w * interpolated_w_inverse;
|
||||
};
|
||||
|
||||
const Common::Vec4<u8> primary_color{
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.r(), v1.color.r(), v2.color.r())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.g(), v1.color.g(), v2.color.g())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.b(), v1.color.b(), v2.color.b())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.a(), v1.color.a(), v2.color.a())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
};
|
||||
|
||||
std::array<Common::Vec2<f24>, 3> uv;
|
||||
uv[0].u() = get_interpolated_attribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
|
||||
uv[0].v() = get_interpolated_attribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
|
||||
uv[1].u() = get_interpolated_attribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
|
||||
uv[1].v() = get_interpolated_attribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
|
||||
uv[2].u() = get_interpolated_attribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
|
||||
uv[2].v() = get_interpolated_attribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
|
||||
|
||||
// Sample bound texture units.
|
||||
const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
|
||||
const auto texture_color = TextureColor(uv, textures, tc0_w);
|
||||
|
||||
Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0};
|
||||
Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0};
|
||||
|
||||
if (!regs.lighting.disable) {
|
||||
const auto normquat =
|
||||
Common::Quaternion<f32>{
|
||||
{get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(),
|
||||
get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(),
|
||||
get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()},
|
||||
get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(),
|
||||
}
|
||||
.Normalized();
|
||||
|
||||
const Common::Vec3f view{
|
||||
get_interpolated_attribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(),
|
||||
get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
|
||||
get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
|
||||
/**
|
||||
* Perspective correct attribute interpolation:
|
||||
* Attribute values cannot be calculated by simple linear interpolation since
|
||||
* they are not linear in screen space. For example, when interpolating a
|
||||
* texture coordinate across two vertices, something simple like
|
||||
* u = (u0*w0 + u1*w1)/(w0+w1)
|
||||
* will not work. However, the attribute value divided by the
|
||||
* clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
|
||||
* in screenspace. Hence, we can linearly interpolate these two independently and
|
||||
* calculate the interpolated attribute by dividing the results.
|
||||
* I.e.
|
||||
* u_over_w = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
|
||||
* one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
|
||||
* u = u_over_w / one_over_w
|
||||
*
|
||||
* The generalization to three vertices is straightforward in baricentric
|
||||
*coordinates.
|
||||
**/
|
||||
const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) {
|
||||
auto attr_over_w = Common::MakeVec(attr0, attr1, attr2);
|
||||
f24 interpolated_attr_over_w =
|
||||
Common::Dot(attr_over_w, baricentric_coordinates);
|
||||
return interpolated_attr_over_w * interpolated_w_inverse;
|
||||
};
|
||||
std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors(
|
||||
regs.lighting, state.lighting, normquat, view, texture_color);
|
||||
}
|
||||
|
||||
// Write the TEV stages.
|
||||
WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color,
|
||||
secondary_fragment_color);
|
||||
const Common::Vec4<u8> primary_color{
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.r(), v1.color.r(), v2.color.r())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.g(), v1.color.g(), v2.color.g())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.b(), v1.color.b(), v2.color.b())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
static_cast<u8>(
|
||||
round(get_interpolated_attribute(v0.color.a(), v1.color.a(), v2.color.a())
|
||||
.ToFloat32() *
|
||||
255)),
|
||||
};
|
||||
|
||||
const auto& output_merger = regs.framebuffer.output_merger;
|
||||
if (output_merger.fragment_operation_mode ==
|
||||
FramebufferRegs::FragmentOperationMode::Shadow) {
|
||||
u32 depth_int = static_cast<u32>(depth * 0xFFFFFF);
|
||||
// Use green color as the shadow intensity
|
||||
u8 stencil = combiner_output.y;
|
||||
fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil);
|
||||
// Skip the normal output merger pipeline if it is in shadow mode
|
||||
continue;
|
||||
}
|
||||
std::array<Common::Vec2<f24>, 3> uv;
|
||||
uv[0].u() = get_interpolated_attribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
|
||||
uv[0].v() = get_interpolated_attribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
|
||||
uv[1].u() = get_interpolated_attribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
|
||||
uv[1].v() = get_interpolated_attribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
|
||||
uv[2].u() = get_interpolated_attribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
|
||||
uv[2].v() = get_interpolated_attribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
|
||||
|
||||
// Does alpha testing happen before or after stencil?
|
||||
if (!DoAlphaTest(combiner_output.a())) {
|
||||
continue;
|
||||
// Sample bound texture units.
|
||||
const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
|
||||
auto texture_color = TextureColor(uv, textures, tc0_w);
|
||||
|
||||
Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0};
|
||||
Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0};
|
||||
|
||||
if (!regs.lighting.disable) {
|
||||
const auto normquat =
|
||||
Common::Quaternion<f32>{
|
||||
{get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x)
|
||||
.ToFloat32(),
|
||||
get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y)
|
||||
.ToFloat32(),
|
||||
get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z)
|
||||
.ToFloat32()},
|
||||
get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(),
|
||||
}
|
||||
.Normalized();
|
||||
|
||||
const Common::Vec3f view{
|
||||
get_interpolated_attribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(),
|
||||
get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
|
||||
get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
|
||||
};
|
||||
std::tie(primary_fragment_color, secondary_fragment_color) =
|
||||
ComputeFragmentsColors(regs.lighting, state.lighting, normquat, view,
|
||||
texture_color);
|
||||
}
|
||||
|
||||
// Write the TEV stages.
|
||||
WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color,
|
||||
secondary_fragment_color);
|
||||
|
||||
const auto& output_merger = regs.framebuffer.output_merger;
|
||||
if (output_merger.fragment_operation_mode ==
|
||||
FramebufferRegs::FragmentOperationMode::Shadow) {
|
||||
u32 depth_int = static_cast<u32>(depth * 0xFFFFFF);
|
||||
// Use green color as the shadow intensity
|
||||
u8 stencil = combiner_output.y;
|
||||
fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil);
|
||||
// Skip the normal output merger pipeline if it is in shadow mode
|
||||
continue;
|
||||
}
|
||||
|
||||
// Does alpha testing happen before or after stencil?
|
||||
if (!DoAlphaTest(combiner_output.a())) {
|
||||
continue;
|
||||
}
|
||||
WriteFog(depth);
|
||||
if (!DoDepthStencilTest(x, y, depth)) {
|
||||
continue;
|
||||
}
|
||||
const auto result = PixelColor(x, y);
|
||||
if (regs.framebuffer.framebuffer.allow_color_write != 0) {
|
||||
fb.DrawPixel(x >> 4, y >> 4, result);
|
||||
}
|
||||
}
|
||||
WriteFog(combiner_output, depth);
|
||||
if (!DoDepthStencilTest(x, y, depth)) {
|
||||
continue;
|
||||
}
|
||||
const auto result = PixelColor(x, y, combiner_output);
|
||||
if (regs.framebuffer.framebuffer.allow_color_write != 0) {
|
||||
fb.DrawPixel(x >> 4, y >> 4, result);
|
||||
}
|
||||
}
|
||||
};
|
||||
sw_workers.QueueWork(std::move(process_scanline));
|
||||
}
|
||||
sw_workers.WaitForRequests();
|
||||
}
|
||||
|
||||
std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
||||
@ -538,11 +571,10 @@ std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
||||
t = texture.config.height - 1 -
|
||||
GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
|
||||
|
||||
const u8* texture_data = memory.GetPhysicalPointer(texture_address);
|
||||
const auto info = TextureInfo::FromPicaRegister(texture.config, texture.format);
|
||||
|
||||
// TODO: Apply the min and mag filters to the texture
|
||||
texture_color[i] = LookupTexture(texture_data, s, t, info);
|
||||
texture_color[i] = LookupTexture(texture_data[i], s, t, info);
|
||||
}
|
||||
|
||||
if (i == 0 && (texture.config.type == TexturingRegs::TextureConfig::Shadow2D ||
|
||||
@ -572,8 +604,7 @@ std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
||||
return texture_color;
|
||||
}
|
||||
|
||||
Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y,
|
||||
Common::Vec4<u8>& combiner_output) const {
|
||||
Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y) const {
|
||||
const auto dest = fb.GetPixel(x >> 4, y >> 4);
|
||||
Common::Vec4<u8> blend_output = combiner_output;
|
||||
|
||||
@ -664,10 +695,20 @@ Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y,
|
||||
}
|
||||
|
||||
void RasterizerSoftware::WriteTevConfig(
|
||||
std::span<const Common::Vec4<u8>, 4> texture_color,
|
||||
std::span<Common::Vec4<u8>, 4> texture_color,
|
||||
std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
|
||||
Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
|
||||
Common::Vec4<u8> secondary_fragment_color) {
|
||||
|
||||
#if CITRA_ARCH(x86_64)
|
||||
if (use_jit) {
|
||||
const u32 tev_combiner_buffer_color = regs.texturing.tev_combiner_buffer_color.raw;
|
||||
combiner_output = tev_config->Run(texture_color, primary_color, primary_fragment_color,
|
||||
secondary_fragment_color, tev_combiner_buffer_color);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Texture environment - consists of 6 stages of color and alpha combining.
|
||||
* Color combiners take three input color values from some source (e.g. interpolated
|
||||
@ -731,6 +772,7 @@ void RasterizerSoftware::WriteTevConfig(
|
||||
GetColorModifier(tev_stage.color_modifier2, get_source(tev_stage.color_source2)),
|
||||
GetColorModifier(tev_stage.color_modifier3, get_source(tev_stage.color_source3)),
|
||||
};
|
||||
|
||||
const Common::Vec3<u8> color_output = ColorCombine(tev_stage.color_op, color_result);
|
||||
|
||||
u8 alpha_output;
|
||||
@ -768,7 +810,7 @@ void RasterizerSoftware::WriteTevConfig(
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerSoftware::WriteFog(Common::Vec4<u8>& combiner_output, float depth) const {
|
||||
void RasterizerSoftware::WriteFog(float depth) const {
|
||||
/**
|
||||
* Apply fog combiner. Not fully accurate. We'd have to know what data type is used to
|
||||
* store the depth etc. Using float for now until we know more about Pica datatypes.
|
||||
|
@ -4,13 +4,20 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/arch.h"
|
||||
#include "common/thread_worker.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_software/sw_clipper.h"
|
||||
#include "video_core/renderer_software/sw_framebuffer.h"
|
||||
|
||||
#if CITRA_ARCH(x86_64)
|
||||
#include "video_core/renderer_software/sw_tev_jit.h"
|
||||
#endif
|
||||
|
||||
namespace Pica::Shader {
|
||||
struct OutputVertex;
|
||||
}
|
||||
@ -52,16 +59,16 @@ private:
|
||||
std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const;
|
||||
|
||||
/// Returns the final pixel color with blending or logic ops applied.
|
||||
Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8>& combiner_output) const;
|
||||
Common::Vec4<u8> PixelColor(u16 x, u16 y) const;
|
||||
|
||||
/// Emulates the TEV configuration and returns the combiner output.
|
||||
void WriteTevConfig(std::span<const Common::Vec4<u8>, 4> texture_color,
|
||||
void WriteTevConfig(std::span<Common::Vec4<u8>, 4> texture_color,
|
||||
std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
|
||||
Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
|
||||
Common::Vec4<u8> secondary_fragment_color);
|
||||
|
||||
/// Blends fog to the combiner output if enabled.
|
||||
void WriteFog(Common::Vec4<u8>& combiner_output, float depth) const;
|
||||
void WriteFog(float depth) const;
|
||||
|
||||
/// Performs the alpha test. Returns false if the test failed.
|
||||
bool DoAlphaTest(u8 alpha) const;
|
||||
@ -73,10 +80,13 @@ private:
|
||||
Memory::MemorySystem& memory;
|
||||
Pica::State& state;
|
||||
const Pica::Regs& regs;
|
||||
bool use_jit{true};
|
||||
size_t num_sw_threads;
|
||||
Common::ThreadWorker sw_workers;
|
||||
Framebuffer fb;
|
||||
// Kirby Blowout Blast relies on the combiner output of a previous draw
|
||||
// in order to render the sky correctly.
|
||||
Common::Vec4<u8> combiner_output{};
|
||||
TevCache tev_cache;
|
||||
TevConfig* tev_config{};
|
||||
std::array<const u8*, 3> texture_data{};
|
||||
};
|
||||
|
||||
} // namespace SwRenderer
|
||||
|
473
src/video_core/renderer_software/sw_tev_jit.cpp
Normal file
473
src/video_core/renderer_software/sw_tev_jit.cpp
Normal file
@ -0,0 +1,473 @@
|
||||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bit>
|
||||
#include <emmintrin.h>
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_software/sw_tev_jit.h"
|
||||
|
||||
namespace SwRenderer {
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Common::X64;
|
||||
using namespace Xbyak::util;
|
||||
using Pica::TexturingRegs;
|
||||
using Xbyak::Reg32;
|
||||
using Xbyak::Reg64;
|
||||
using Xbyak::Xmm;
|
||||
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
|
||||
|
||||
constexpr Reg32 A0 = r11d;
|
||||
constexpr Reg32 A1 = r12d;
|
||||
constexpr Reg32 A2 = r13d;
|
||||
constexpr Reg32 ALPHA_OUTPUT = r14d;
|
||||
constexpr Xmm COMBINER_OUTPUT = xmm0;
|
||||
constexpr Xmm COMBINER_BUFFER = xmm1;
|
||||
constexpr Xmm NEXT_COMBINER_BUFFER = xmm2;
|
||||
constexpr Xmm VEC0 = xmm3;
|
||||
constexpr Xmm VEC1 = xmm4;
|
||||
constexpr Xmm VEC2 = xmm5;
|
||||
constexpr Xmm COLOR_OUTPUT = xmm6;
|
||||
constexpr Xmm ZERO = xmm13;
|
||||
constexpr Xmm MID_COLOR = xmm14;
|
||||
constexpr Xmm MAX_COLOR = xmm15;
|
||||
|
||||
bool IsPassThroughTevStage(const TevStageConfig& stage) {
|
||||
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||
stage.alpha_op == TevStageConfig::Operation::Replace &&
|
||||
stage.color_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.alpha_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
|
||||
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
|
||||
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
TevConfigKey::TevConfigKey(const Pica::TexturingRegs& regs) {
|
||||
const auto& tev_stages = regs.GetTevStages();
|
||||
for (size_t i = 0; i < tev_stages.size(); i++) {
|
||||
const auto& tev_stage = tev_stages[i];
|
||||
stages[i].sources_raw = tev_stage.sources_raw;
|
||||
stages[i].modifiers_raw = tev_stage.modifiers_raw;
|
||||
stages[i].ops_raw = tev_stage.ops_raw;
|
||||
stages[i].const_color = tev_stage.const_color;
|
||||
stages[i].scales_raw = tev_stage.scales_raw;
|
||||
}
|
||||
}
|
||||
|
||||
TevConfig::TevConfig(const Pica::Regs& regs_, const TevConfigKey& key) : regs{regs_} {
|
||||
WriteTevConfig(key);
|
||||
}
|
||||
|
||||
TevConfig::~TevConfig() = default;
|
||||
|
||||
Common::Vec4<u8> TevConfig::Run(std::span<Common::Vec4<u8>, 4> texture_color_,
|
||||
Common::Vec4<u8> primary_color_,
|
||||
Common::Vec4<u8> primary_fragment_color_,
|
||||
Common::Vec4<u8> secondary_fragment_color_,
|
||||
u64 tev_combiner_buffer_color) {
|
||||
u32* texture_color = reinterpret_cast<u32*>(texture_color_.data());
|
||||
const u32 primary_color = std::bit_cast<u32>(primary_color_);
|
||||
const u32 primary_fragment_color = std::bit_cast<u32>(primary_fragment_color_);
|
||||
const u32 secondary_fragment_color = std::bit_cast<u32>(secondary_fragment_color_);
|
||||
const u64 secondary_fragment_color_and_tev_combiner_buffer_color =
|
||||
secondary_fragment_color | (tev_combiner_buffer_color << 32);
|
||||
const u32 result = program(texture_color, primary_color, primary_fragment_color,
|
||||
secondary_fragment_color_and_tev_combiner_buffer_color);
|
||||
return std::bit_cast<Common::Vec4<u8>>(result);
|
||||
}
|
||||
|
||||
void TevConfig::WriteTevConfig(const TevConfigKey& key) {
|
||||
program = (CompiledTevFun*)getCurr();
|
||||
|
||||
constexpr Xbyak::Reg TEXTURE_COLOR = ABI_PARAM1;
|
||||
constexpr Xbyak::Reg PRIMARY_COLOR = ABI_PARAM2;
|
||||
constexpr Xbyak::Reg PRIMARY_FRAGMENT_COLOR = ABI_PARAM3;
|
||||
constexpr Xbyak::Reg SECONDARY_FRAGMENT_COLOR = ABI_PARAM4;
|
||||
|
||||
// Save calle state
|
||||
ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
|
||||
|
||||
// Clear the combiner registers and zero constant
|
||||
pxor(COMBINER_OUTPUT, COMBINER_OUTPUT);
|
||||
pxor(COMBINER_BUFFER, COMBINER_BUFFER);
|
||||
pxor(ZERO, ZERO);
|
||||
|
||||
// Used to set an xmm register to the max color
|
||||
static const __m128i max = _mm_set1_epi32(255);
|
||||
mov(rax, reinterpret_cast<size_t>(&max));
|
||||
movdqu(MAX_COLOR, xword[rax]);
|
||||
|
||||
// Used to set an xmm register to the mid color
|
||||
static const __m128i mid = _mm_set1_epi32(128);
|
||||
mov(rax, reinterpret_cast<size_t>(&mid));
|
||||
movdqu(MID_COLOR, xword[rax]);
|
||||
|
||||
// Load next_combiner_buffer
|
||||
mov(rax, ABI_PARAM4);
|
||||
shr(rax, 32);
|
||||
vmovd(NEXT_COMBINER_BUFFER, eax);
|
||||
pmovzxbd(NEXT_COMBINER_BUFFER, NEXT_COMBINER_BUFFER);
|
||||
|
||||
for (u32 tev_stage_index = 0; tev_stage_index < key.stages.size(); ++tev_stage_index) {
|
||||
const auto& tev_stage = key.stages[tev_stage_index];
|
||||
if (!IsPassThroughTevStage(tev_stage)) {
|
||||
using Source = TexturingRegs::TevStageConfig::Source;
|
||||
|
||||
const auto get_source = [&](const Xbyak::Xmm& dest, Source source) {
|
||||
switch (source) {
|
||||
case Source::PrimaryColor:
|
||||
vmovd(dest, PRIMARY_COLOR.cvt32());
|
||||
pmovzxbd(dest, dest);
|
||||
break;
|
||||
case Source::PrimaryFragmentColor:
|
||||
vmovd(dest, PRIMARY_FRAGMENT_COLOR.cvt32());
|
||||
pmovzxbd(dest, dest);
|
||||
break;
|
||||
case Source::SecondaryFragmentColor:
|
||||
vmovd(dest, SECONDARY_FRAGMENT_COLOR.cvt32());
|
||||
pmovzxbd(dest, dest);
|
||||
break;
|
||||
case Source::Texture0:
|
||||
case Source::Texture1:
|
||||
case Source::Texture2:
|
||||
case Source::Texture3: {
|
||||
const u32 index = static_cast<u32>(source) - static_cast<u32>(Source::Texture0);
|
||||
vmovd(dest, dword[TEXTURE_COLOR + index * sizeof(u32)]);
|
||||
pmovzxbd(dest, dest);
|
||||
break;
|
||||
}
|
||||
case Source::PreviousBuffer:
|
||||
vmovdqa(dest, COMBINER_BUFFER);
|
||||
break;
|
||||
case Source::Constant:
|
||||
mov(eax, tev_stage.const_color);
|
||||
vmovd(dest, eax);
|
||||
pmovzxbd(dest, dest);
|
||||
break;
|
||||
case Source::Previous:
|
||||
vmovdqa(dest, COMBINER_OUTPUT);
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown color combiner source {}", source);
|
||||
UNIMPLEMENTED();
|
||||
vmovdqa(dest, ZERO);
|
||||
}
|
||||
return dest;
|
||||
};
|
||||
|
||||
// Load the color modifiers to VEC0/1/2.
|
||||
GetColorModifier(get_source(VEC0, tev_stage.color_source1), tev_stage.color_modifier1);
|
||||
GetColorModifier(get_source(VEC1, tev_stage.color_source2), tev_stage.color_modifier2);
|
||||
GetColorModifier(get_source(VEC2, tev_stage.color_source3), tev_stage.color_modifier3);
|
||||
|
||||
// Combine the texture colors to COLOR_OUTPUT.
|
||||
ColorCombine(COLOR_OUTPUT, tev_stage.color_op);
|
||||
|
||||
if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// Result of Dot3_RGBA operation is also placed to the alpha component
|
||||
vmovd(ALPHA_OUTPUT.cvt32(), COLOR_OUTPUT);
|
||||
} else {
|
||||
// Load the alpha modifers to VEC0/1/2.
|
||||
GetAlphaModifier(get_source(VEC0, tev_stage.alpha_source1), A0,
|
||||
tev_stage.alpha_modifier1);
|
||||
GetAlphaModifier(get_source(VEC1, tev_stage.alpha_source2), A1,
|
||||
tev_stage.alpha_modifier2);
|
||||
GetAlphaModifier(get_source(VEC2, tev_stage.alpha_source3), A2,
|
||||
tev_stage.alpha_modifier3);
|
||||
|
||||
// Combine the alpha values to ALPHA_OUTPUT.
|
||||
AlphaCombine(ALPHA_OUTPUT, tev_stage.alpha_op);
|
||||
}
|
||||
|
||||
// Load the color multipler to an SSE vector.
|
||||
mov(eax, tev_stage.GetColorMultiplier());
|
||||
movd(VEC0, eax);
|
||||
pshufd(VEC0, VEC0, 0);
|
||||
|
||||
// Multiply color output with the multiplier and take the minimum.
|
||||
pmulld(COLOR_OUTPUT, VEC0);
|
||||
pminsd(COLOR_OUTPUT, MAX_COLOR);
|
||||
|
||||
// Load the alpha multiplier, multiply it with the alpha output.
|
||||
mov(eax, tev_stage.GetAlphaMultiplier());
|
||||
imul(ALPHA_OUTPUT, eax);
|
||||
|
||||
// Load result to a vector and take the minimum
|
||||
movd(VEC0, ALPHA_OUTPUT);
|
||||
pshufd(VEC0, VEC0, 0);
|
||||
pminsd(VEC0, MAX_COLOR);
|
||||
|
||||
// Blend vectors to get the combiner output
|
||||
vpblendd(COMBINER_OUTPUT, COLOR_OUTPUT, VEC0, 0b1000);
|
||||
}
|
||||
|
||||
// Set combiner buffer to the next buffer
|
||||
movq(COMBINER_BUFFER, NEXT_COMBINER_BUFFER);
|
||||
|
||||
if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(
|
||||
tev_stage_index)) {
|
||||
vpblendd(NEXT_COMBINER_BUFFER, COMBINER_OUTPUT, NEXT_COMBINER_BUFFER, 0b1000);
|
||||
}
|
||||
|
||||
if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(
|
||||
tev_stage_index)) {
|
||||
vpblendd(NEXT_COMBINER_BUFFER, COMBINER_OUTPUT, NEXT_COMBINER_BUFFER, 0b0111);
|
||||
}
|
||||
}
|
||||
|
||||
// Pack combiner output to a u32 to be returned.
|
||||
vpextrd(edx, COMBINER_OUTPUT, 3);
|
||||
vpextrd(eax, COMBINER_OUTPUT, 2);
|
||||
sal(edx, 8);
|
||||
or_(eax, edx);
|
||||
vpextrd(edx, COMBINER_OUTPUT, 1);
|
||||
sal(eax, 8);
|
||||
or_(edx, eax);
|
||||
vmovd(eax, COMBINER_OUTPUT);
|
||||
sal(edx, 8);
|
||||
or_(eax, edx);
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
|
||||
ret();
|
||||
ready();
|
||||
}
|
||||
|
||||
void TevConfig::GetColorModifier(const Xbyak::Xmm& dest, TevStageConfig::ColorModifier factor) {
|
||||
using ColorModifier = TevStageConfig::ColorModifier;
|
||||
|
||||
const auto broadcast = [&](u32 comp) {
|
||||
const u8 mask = comp | (comp << 2) | (comp << 4);
|
||||
vpshufd(dest, dest, mask);
|
||||
};
|
||||
|
||||
switch (factor) {
|
||||
case ColorModifier::SourceColor:
|
||||
vpblendd(dest, dest, ZERO, 0b1000);
|
||||
break;
|
||||
case ColorModifier::OneMinusSourceColor:
|
||||
vpsubd(dest, MAX_COLOR, dest);
|
||||
break;
|
||||
case ColorModifier::SourceAlpha:
|
||||
broadcast(3);
|
||||
break;
|
||||
case ColorModifier::OneMinusSourceAlpha:
|
||||
broadcast(3);
|
||||
vpsubd(dest, MAX_COLOR, dest);
|
||||
break;
|
||||
case ColorModifier::SourceRed:
|
||||
broadcast(0);
|
||||
break;
|
||||
case ColorModifier::OneMinusSourceRed:
|
||||
broadcast(0);
|
||||
vpsubd(dest, MAX_COLOR, dest);
|
||||
break;
|
||||
case ColorModifier::SourceGreen:
|
||||
broadcast(1);
|
||||
break;
|
||||
case ColorModifier::OneMinusSourceGreen:
|
||||
broadcast(1);
|
||||
vpsubd(dest, MAX_COLOR, dest);
|
||||
break;
|
||||
case ColorModifier::SourceBlue:
|
||||
broadcast(2);
|
||||
break;
|
||||
case ColorModifier::OneMinusSourceBlue:
|
||||
broadcast(2);
|
||||
vpsubd(dest, MAX_COLOR, dest);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
pand(dest, MAX_COLOR);
|
||||
};
|
||||
|
||||
void TevConfig::ColorCombine(const Xbyak::Xmm& dest, TevStageConfig::Operation op) {
|
||||
using Operation = TevStageConfig::Operation;
|
||||
|
||||
switch (op) {
|
||||
case Operation::Replace:
|
||||
vmovdqa(dest, VEC0);
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
pmulld(VEC0, VEC1);
|
||||
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||
break;
|
||||
case Operation::Add:
|
||||
vpaddd(VEC0, VEC0, VEC1);
|
||||
vpminsd(dest, MAX_COLOR, VEC0);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
vpaddd(VEC0, VEC0, VEC1);
|
||||
vpsubd(VEC0, VEC0, MID_COLOR);
|
||||
vpminsd(VEC0, VEC0, MAX_COLOR);
|
||||
vpmaxsd(dest, VEC0, ZERO);
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
pmulld(VEC0, VEC2);
|
||||
psubd(VEC2, MAX_COLOR);
|
||||
pmulld(VEC1, VEC2);
|
||||
vpaddd(dest, VEC0, VEC1);
|
||||
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
psubd(VEC0, VEC1);
|
||||
vpmaxsd(dest, VEC0, ZERO);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
pmulld(VEC0, VEC1);
|
||||
pmulld(VEC2, MAX_COLOR);
|
||||
paddd(VEC0, VEC2);
|
||||
pminsd(VEC0, MAX_COLOR);
|
||||
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
paddd(VEC0, VEC1);
|
||||
pminsd(VEC0, MAX_COLOR);
|
||||
pmulld(VEC0, VEC2);
|
||||
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||
break;
|
||||
case Operation::Dot3_RGB:
|
||||
case Operation::Dot3_RGBA:
|
||||
pslld(VEC0, 1);
|
||||
psubd(VEC0, MAX_COLOR);
|
||||
pslld(VEC1, 1);
|
||||
psubd(VEC1, MAX_COLOR);
|
||||
pmulld(VEC0, VEC1);
|
||||
paddd(VEC0, MID_COLOR);
|
||||
psrld(VEC0, 8);
|
||||
vpblendd(VEC0, VEC0, ZERO, 0b1000);
|
||||
phaddd(VEC0, VEC0);
|
||||
phaddd(VEC0, VEC0);
|
||||
pminsd(VEC0, MAX_COLOR);
|
||||
pmaxsd(VEC0, ZERO);
|
||||
pshufd(dest, VEC0, 0);
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown color combiner operation {}", (int)op);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
pand(dest, MAX_COLOR);
|
||||
};
|
||||
|
||||
void TevConfig::GetAlphaModifier(const Xbyak::Xmm& src, const Xbyak::Reg32& dest,
|
||||
TevStageConfig::AlphaModifier factor) {
|
||||
using AlphaModifier = TevStageConfig::AlphaModifier;
|
||||
|
||||
const auto get_comp = [&](u32 comp, bool minus = false) {
|
||||
const auto& reg = minus ? eax : dest;
|
||||
vpextrd(reg, src, comp);
|
||||
if (minus) {
|
||||
mov(dest, 255);
|
||||
sub(dest, reg);
|
||||
}
|
||||
};
|
||||
|
||||
switch (factor) {
|
||||
case AlphaModifier::SourceAlpha:
|
||||
get_comp(3);
|
||||
break;
|
||||
case AlphaModifier::OneMinusSourceAlpha:
|
||||
get_comp(3, true);
|
||||
break;
|
||||
case AlphaModifier::SourceRed:
|
||||
get_comp(0);
|
||||
break;
|
||||
case AlphaModifier::OneMinusSourceRed:
|
||||
get_comp(0, true);
|
||||
break;
|
||||
case AlphaModifier::SourceGreen:
|
||||
get_comp(1);
|
||||
break;
|
||||
case AlphaModifier::OneMinusSourceGreen:
|
||||
get_comp(1, true);
|
||||
break;
|
||||
case AlphaModifier::SourceBlue:
|
||||
get_comp(2);
|
||||
break;
|
||||
case AlphaModifier::OneMinusSourceBlue:
|
||||
get_comp(2, true);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
void TevConfig::AlphaCombine(const Xbyak::Reg32& dest, TevStageConfig::Operation op) {
|
||||
using Operation = TevStageConfig::Operation;
|
||||
|
||||
const auto div_255 = [&](const Reg32& dst, const Reg32& src) {
|
||||
mov(dst, 0x80808081);
|
||||
imul(dst.cvt64(), src.cvt64());
|
||||
shr(dst.cvt64(), 39);
|
||||
};
|
||||
|
||||
switch (op) {
|
||||
case Operation::Replace:
|
||||
mov(dest, A0);
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
imul(A0, A1);
|
||||
div_255(dest, A0);
|
||||
break;
|
||||
case Operation::Add:
|
||||
add(A0, A1);
|
||||
cmp(A0, 255);
|
||||
mov(eax, 255);
|
||||
cmovb(A0, eax);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
xor_(eax, eax);
|
||||
add(A0, A1);
|
||||
sub(A0, 128);
|
||||
test(A0, A0);
|
||||
cmovg(eax, A0);
|
||||
cmp(eax, 255);
|
||||
mov(A0, 255);
|
||||
cmovb(A0, eax);
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
imul(A0, A2);
|
||||
mov(eax, 255);
|
||||
sub(eax, A2);
|
||||
imul(A1, eax);
|
||||
add(A0, A1);
|
||||
div_255(dest, A0);
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
sub(A0, A1);
|
||||
xor_(eax, eax);
|
||||
test(A0, A0);
|
||||
cmovl(A0, eax);
|
||||
mov(dest, A0);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
imul(A0, A1);
|
||||
mov(dest, A2);
|
||||
shl(dest, 8);
|
||||
sub(dest, A2);
|
||||
add(dest, A0);
|
||||
div_255(eax, dest);
|
||||
cmp(eax, 255);
|
||||
mov(dest, 255);
|
||||
cmovb(dest, eax);
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
add(A0, A1);
|
||||
cmp(A0, 255);
|
||||
mov(eax, 255);
|
||||
cmovg(A0, eax);
|
||||
imul(A0, A2);
|
||||
div_255(dest, A0);
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown alpha combiner operation {}", (int)op);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace SwRenderer
|
64
src/video_core/renderer_software/sw_tev_jit.h
Normal file
64
src/video_core/renderer_software/sw_tev_jit.h
Normal file
@ -0,0 +1,64 @@
|
||||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
#include "common/hash.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
|
||||
namespace Pica {
|
||||
struct State;
|
||||
struct Regs;
|
||||
} // namespace Pica
|
||||
|
||||
namespace SwRenderer {
|
||||
|
||||
struct TevConfigKey {
|
||||
explicit TevConfigKey(const Pica::TexturingRegs& regs);
|
||||
|
||||
u64 Hash() const noexcept {
|
||||
return Common::ComputeHash64(this, sizeof(TevConfigKey));
|
||||
}
|
||||
|
||||
std::array<Pica::TexturingRegs::TevStageConfig, 6> stages;
|
||||
};
|
||||
|
||||
class TevConfig : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
explicit TevConfig(const Pica::Regs& regs, const TevConfigKey& key);
|
||||
~TevConfig();
|
||||
|
||||
Common::Vec4<u8> Run(std::span<Common::Vec4<u8>, 4> texture_color_,
|
||||
Common::Vec4<u8> primary_color_, Common::Vec4<u8> primary_fragment_color_,
|
||||
Common::Vec4<u8> secondary_fragment_color_, u64 tev_combiner_buffer_color);
|
||||
|
||||
private:
|
||||
void WriteTevConfig(const TevConfigKey& key);
|
||||
|
||||
void GetColorModifier(const Xbyak::Xmm& dest,
|
||||
Pica::TexturingRegs::TevStageConfig::ColorModifier factor);
|
||||
|
||||
void GetAlphaModifier(const Xbyak::Xmm& src, const Xbyak::Reg32& dest,
|
||||
Pica::TexturingRegs::TevStageConfig::AlphaModifier factor);
|
||||
|
||||
void ColorCombine(const Xbyak::Xmm& dest, Pica::TexturingRegs::TevStageConfig::Operation op);
|
||||
|
||||
void AlphaCombine(const Xbyak::Reg32& dest, Pica::TexturingRegs::TevStageConfig::Operation op);
|
||||
|
||||
private:
|
||||
const Pica::Regs& regs;
|
||||
|
||||
using CompiledTevFun = u32(u32* texture_color, u32 primary_color, u32 primary_fragment_color,
|
||||
u64 secondary_fragment_color_and_tev_combiner_buffer_color);
|
||||
|
||||
CompiledTevFun* program = nullptr;
|
||||
};
|
||||
|
||||
using TevCache = std::unordered_map<u64, std::unique_ptr<TevConfig>, Common::IdentityHash<u64>>;
|
||||
|
||||
} // namespace SwRenderer
|
@ -338,15 +338,39 @@ void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) {
|
||||
// where neither source was, this NaN was generated by a 0 * inf multiplication, and so the
|
||||
// result should be transformed to 0 to match PICA fp rules.
|
||||
|
||||
if (host_caps.has(Cpu::tAVX512F | Cpu::tAVX512VL | Cpu::tAVX512DQ)) {
|
||||
vmulps(scratch, src1, src2);
|
||||
|
||||
// Mask of any NaN values found in the result
|
||||
const Xbyak::Opmask zero_mask = k1;
|
||||
vcmpunordps(zero_mask, scratch, scratch);
|
||||
|
||||
// Mask of any non-NaN inputs producing NaN results
|
||||
vcmpordps(zero_mask | zero_mask, src1, src2);
|
||||
|
||||
knotb(zero_mask, zero_mask);
|
||||
vmovaps(src1 | zero_mask | T_z, scratch);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Set scratch to mask of (src1 != NaN and src2 != NaN)
|
||||
movaps(scratch, src1);
|
||||
cmpordps(scratch, src2);
|
||||
if (host_caps.has(Cpu::tAVX)) {
|
||||
vcmpordps(scratch, src1, src2);
|
||||
} else {
|
||||
movaps(scratch, src1);
|
||||
cmpordps(scratch, src2);
|
||||
}
|
||||
|
||||
mulps(src1, src2);
|
||||
|
||||
// Set src2 to mask of (result == NaN)
|
||||
movaps(src2, src1);
|
||||
cmpunordps(src2, src2);
|
||||
if (host_caps.has(Cpu::tAVX)) {
|
||||
vcmpunordps(src2, src2, src1);
|
||||
} else {
|
||||
movaps(src2, src1);
|
||||
cmpunordps(src2, src2);
|
||||
}
|
||||
|
||||
// Clear components where scratch != src2 (i.e. if result is NaN where neither source was NaN)
|
||||
xorps(scratch, src2);
|
||||
@ -406,13 +430,20 @@ void JitShader::Compile_DP3(Instruction instr) {
|
||||
|
||||
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
||||
|
||||
movaps(SRC2, SRC1);
|
||||
shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
if (host_caps.has(Cpu::tAVX)) {
|
||||
vshufps(SRC3, SRC1, SRC1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vshufps(SRC2, SRC1, SRC1, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vshufps(SRC1, SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
} else {
|
||||
movaps(SRC2, SRC1);
|
||||
shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
movaps(SRC3, SRC1);
|
||||
shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
movaps(SRC3, SRC1);
|
||||
shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
}
|
||||
|
||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
addps(SRC1, SRC2);
|
||||
addps(SRC1, SRC3);
|
||||
|
||||
@ -589,9 +620,15 @@ void JitShader::Compile_MOV(Instruction instr) {
|
||||
void JitShader::Compile_RCP(Instruction instr) {
|
||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||
|
||||
// TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
|
||||
// performs this operation more accurately. This should be checked on hardware.
|
||||
rcpss(SRC1, SRC1);
|
||||
if (host_caps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) {
|
||||
// Accurate to 14 bits of precisions rather than 12 bits of rcpss
|
||||
vrcp14ss(SRC1, SRC1, SRC1);
|
||||
} else {
|
||||
// TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
|
||||
// performs this operation more accurately. This should be checked on hardware.
|
||||
rcpss(SRC1, SRC1);
|
||||
}
|
||||
|
||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
|
||||
|
||||
Compile_DestEnable(instr, SRC1);
|
||||
@ -600,9 +637,15 @@ void JitShader::Compile_RCP(Instruction instr) {
|
||||
void JitShader::Compile_RSQ(Instruction instr) {
|
||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||
|
||||
// TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
|
||||
// performs this operation more accurately. This should be checked on hardware.
|
||||
rsqrtss(SRC1, SRC1);
|
||||
if (host_caps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) {
|
||||
// Accurate to 14 bits of precisions rather than 12 bits of rsqrtss
|
||||
vrsqrt14ss(SRC1, SRC1, SRC1);
|
||||
} else {
|
||||
// TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
|
||||
// performs this operation more accurately. This should be checked on hardware.
|
||||
rsqrtss(SRC1, SRC1);
|
||||
}
|
||||
|
||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
|
||||
|
||||
Compile_DestEnable(instr, SRC1);
|
||||
@ -1050,32 +1093,47 @@ Xbyak::Label JitShader::CompilePrelude_Log2() {
|
||||
jp(input_is_nan);
|
||||
jae(input_out_of_range);
|
||||
|
||||
// Split input
|
||||
movd(eax, SRC1);
|
||||
mov(edx, eax);
|
||||
and_(eax, 0x7f800000);
|
||||
and_(edx, 0x007fffff);
|
||||
movss(SCRATCH, xword[rip + c0]); // Preload c0.
|
||||
or_(edx, 0x3f800000);
|
||||
movd(SRC1, edx);
|
||||
// SRC1 now contains the mantissa of the input.
|
||||
mulss(SCRATCH, SRC1);
|
||||
shr(eax, 23);
|
||||
sub(eax, 0x7f);
|
||||
cvtsi2ss(SCRATCH2, eax);
|
||||
// SCRATCH2 now contains the exponent of the input.
|
||||
// Split input: SRC1=MANT[1,2) SCRATCH2=Exponent
|
||||
if (host_caps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) {
|
||||
vgetexpss(SCRATCH2, SRC1, SRC1);
|
||||
vgetmantss(SRC1, SRC1, SRC1, 0x0'0);
|
||||
} else {
|
||||
movd(eax, SRC1);
|
||||
mov(edx, eax);
|
||||
and_(eax, 0x7f800000);
|
||||
and_(edx, 0x007fffff);
|
||||
or_(edx, 0x3f800000);
|
||||
movd(SRC1, edx);
|
||||
// SRC1 now contains the mantissa of the input.
|
||||
shr(eax, 23);
|
||||
sub(eax, 0x7f);
|
||||
cvtsi2ss(SCRATCH2, eax);
|
||||
// SCRATCH2 now contains the exponent of the input.
|
||||
}
|
||||
|
||||
movss(SCRATCH, xword[rip + c0]);
|
||||
|
||||
// Complete computation of polynomial
|
||||
addss(SCRATCH, xword[rip + c1]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
addss(SCRATCH, xword[rip + c2]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
addss(SCRATCH, xword[rip + c3]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
subss(SRC1, ONE);
|
||||
addss(SCRATCH, xword[rip + c4]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
addss(SCRATCH2, SCRATCH);
|
||||
if (host_caps.has(Cpu::tFMA)) {
|
||||
vfmadd213ss(SCRATCH, SRC1, xword[rip + c1]);
|
||||
vfmadd213ss(SCRATCH, SRC1, xword[rip + c2]);
|
||||
vfmadd213ss(SCRATCH, SRC1, xword[rip + c3]);
|
||||
vfmadd213ss(SCRATCH, SRC1, xword[rip + c4]);
|
||||
subss(SRC1, ONE);
|
||||
vfmadd231ss(SCRATCH2, SCRATCH, SRC1);
|
||||
} else {
|
||||
mulss(SCRATCH, SRC1);
|
||||
addss(SCRATCH, xword[rip + c1]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
addss(SCRATCH, xword[rip + c2]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
addss(SCRATCH, xword[rip + c3]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
subss(SRC1, ONE);
|
||||
addss(SCRATCH, xword[rip + c4]);
|
||||
mulss(SCRATCH, SRC1);
|
||||
addss(SCRATCH2, SCRATCH);
|
||||
}
|
||||
|
||||
// Duplicate result across vector
|
||||
xorps(SRC1, SRC1); // break dependency chain
|
||||
@ -1122,33 +1180,69 @@ Xbyak::Label JitShader::CompilePrelude_Exp2() {
|
||||
// Handle edge cases
|
||||
ucomiss(SRC1, SRC1);
|
||||
jp(ret_label);
|
||||
// Clamp to maximum range since we shift the value directly into the exponent.
|
||||
minss(SRC1, xword[rip + input_max]);
|
||||
maxss(SRC1, xword[rip + input_min]);
|
||||
|
||||
// Decompose input
|
||||
movss(SCRATCH, SRC1);
|
||||
movss(SCRATCH2, xword[rip + c0]); // Preload c0.
|
||||
subss(SCRATCH, xword[rip + half]);
|
||||
cvtss2si(eax, SCRATCH);
|
||||
cvtsi2ss(SCRATCH, eax);
|
||||
// SCRATCH now contains input rounded to the nearest integer.
|
||||
add(eax, 0x7f);
|
||||
subss(SRC1, SCRATCH);
|
||||
// SRC1 contains input - round(input), which is in [-0.5, 0.5).
|
||||
mulss(SCRATCH2, SRC1);
|
||||
shl(eax, 23);
|
||||
movd(SCRATCH, eax);
|
||||
// SCRATCH contains 2^(round(input)).
|
||||
// Decompose input:
|
||||
// SCRATCH=2^round(input)
|
||||
// SRC1=input-round(input) [-0.5, 0.5)
|
||||
if (host_caps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) {
|
||||
// input - 0.5
|
||||
vsubss(SCRATCH, SRC1, xword[rip + half]);
|
||||
|
||||
// trunc(input - 0.5)
|
||||
vrndscaless(SCRATCH2, SCRATCH, SCRATCH, _MM_FROUND_TRUNC);
|
||||
|
||||
// SCRATCH = 1 * 2^(trunc(input - 0.5))
|
||||
vscalefss(SCRATCH, ONE, SCRATCH2);
|
||||
|
||||
// SRC1 = input-trunc(input - 0.5)
|
||||
vsubss(SRC1, SRC1, SCRATCH2);
|
||||
} else {
|
||||
// Clamp to maximum range since we shift the value directly into the exponent.
|
||||
minss(SRC1, xword[rip + input_max]);
|
||||
maxss(SRC1, xword[rip + input_min]);
|
||||
|
||||
if (host_caps.has(Cpu::tAVX)) {
|
||||
vsubss(SCRATCH, SRC1, xword[rip + half]);
|
||||
} else {
|
||||
movss(SCRATCH, SRC1);
|
||||
subss(SCRATCH, xword[rip + half]);
|
||||
}
|
||||
|
||||
if (host_caps.has(Cpu::tSSE41)) {
|
||||
roundss(SCRATCH, SCRATCH, _MM_FROUND_TRUNC);
|
||||
cvtss2si(eax, SCRATCH);
|
||||
} else {
|
||||
cvtss2si(eax, SCRATCH);
|
||||
cvtsi2ss(SCRATCH, eax);
|
||||
}
|
||||
// SCRATCH now contains input rounded to the nearest integer.
|
||||
add(eax, 0x7f);
|
||||
subss(SRC1, SCRATCH);
|
||||
// SRC1 contains input - round(input), which is in [-0.5, 0.5).
|
||||
shl(eax, 23);
|
||||
movd(SCRATCH, eax);
|
||||
// SCRATCH contains 2^(round(input)).
|
||||
}
|
||||
|
||||
// Complete computation of polynomial.
|
||||
addss(SCRATCH2, xword[rip + c1]);
|
||||
mulss(SCRATCH2, SRC1);
|
||||
addss(SCRATCH2, xword[rip + c2]);
|
||||
mulss(SCRATCH2, SRC1);
|
||||
addss(SCRATCH2, xword[rip + c3]);
|
||||
mulss(SRC1, SCRATCH2);
|
||||
addss(SRC1, xword[rip + c4]);
|
||||
movss(SCRATCH2, xword[rip + c0]);
|
||||
|
||||
if (host_caps.has(Cpu::tFMA)) {
|
||||
vfmadd213ss(SCRATCH2, SRC1, xword[rip + c1]);
|
||||
vfmadd213ss(SCRATCH2, SRC1, xword[rip + c2]);
|
||||
vfmadd213ss(SCRATCH2, SRC1, xword[rip + c3]);
|
||||
vfmadd213ss(SRC1, SCRATCH2, xword[rip + c4]);
|
||||
} else {
|
||||
mulss(SCRATCH2, SRC1);
|
||||
addss(SCRATCH2, xword[rip + c1]);
|
||||
mulss(SCRATCH2, SRC1);
|
||||
addss(SCRATCH2, xword[rip + c2]);
|
||||
mulss(SCRATCH2, SRC1);
|
||||
addss(SCRATCH2, xword[rip + c3]);
|
||||
mulss(SRC1, SCRATCH2);
|
||||
addss(SRC1, xword[rip + c4]);
|
||||
}
|
||||
|
||||
mulss(SRC1, SCRATCH);
|
||||
|
||||
// Duplicate result across vector
|
||||
|
Reference in New Issue
Block a user