diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08cf6268f..d3d32a359 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,8 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "core/core_timing.h" +#include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" @@ -124,9 +126,36 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) { } } +// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence +// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. +// So the values you see in docs might be multiplied by 4. enum class BufferMethods { - BindObject = 0, - CountBufferMethods = 0x40, + BindObject = 0x0, + Nop = 0x2, + SemaphoreAddressHigh = 0x4, + SemaphoreAddressLow = 0x5, + SemaphoreSequence = 0x6, + SemaphoreTrigger = 0x7, + NotifyIntr = 0x8, + WrcacheFlush = 0x9, + Unk28 = 0xA, + Unk2c = 0xB, + RefCnt = 0x14, + SemaphoreAcquire = 0x1A, + SemaphoreRelease = 0x1B, + Unk70 = 0x1C, + Unk74 = 0x1D, + Unk78 = 0x1E, + Unk7c = 0x1F, + Yield = 0x20, + NonPullerMethods = 0x40, +}; + +enum class GpuSemaphoreOperation { + AcquireEqual = 0x1, + WriteLong = 0x2, + AcquireGequal = 0x4, + AcquireMask = 0x8, }; void GPU::CallMethod(const MethodCall& method_call) { @@ -135,20 +164,78 @@ void GPU::CallMethod(const MethodCall& method_call) { ASSERT(method_call.subchannel < bound_engines.size()); - if (method_call.method == static_cast(BufferMethods::BindObject)) { - // Bind the current subchannel to the desired engine id. - LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, - method_call.argument); - bound_engines[method_call.subchannel] = static_cast(method_call.argument); - return; + if (ExecuteMethodOnEngine(method_call)) { + CallEngineMethod(method_call); + } else { + CallPullerMethod(method_call); } +} - if (method_call.method < static_cast(BufferMethods::CountBufferMethods)) { - // TODO(Subv): Research and implement these methods. - LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); - return; +bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) { + const auto method = static_cast(method_call.method); + return method >= BufferMethods::NonPullerMethods; +} + +void GPU::CallPullerMethod(const MethodCall& method_call) { + regs.reg_array[method_call.method] = method_call.argument; + const auto method = static_cast(method_call.method); + + switch (method) { + case BufferMethods::BindObject: { + ProcessBindMethod(method_call); + break; } + case BufferMethods::Nop: + case BufferMethods::SemaphoreAddressHigh: + case BufferMethods::SemaphoreAddressLow: + case BufferMethods::SemaphoreSequence: + case BufferMethods::RefCnt: + break; + case BufferMethods::SemaphoreTrigger: { + ProcessSemaphoreTriggerMethod(); + break; + } + case BufferMethods::NotifyIntr: { + // TODO(Kmather73): Research and implement this method. + LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); + break; + } + case BufferMethods::WrcacheFlush: { + // TODO(Kmather73): Research and implement this method. + LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); + break; + } + case BufferMethods::Unk28: { + // TODO(Kmather73): Research and implement this method. + LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); + break; + } + case BufferMethods::Unk2c: { + // TODO(Kmather73): Research and implement this method. + LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); + break; + } + case BufferMethods::SemaphoreAcquire: { + ProcessSemaphoreAcquire(); + break; + } + case BufferMethods::SemaphoreRelease: { + ProcessSemaphoreRelease(); + break; + } + case BufferMethods::Yield: { + // TODO(Kmather73): Research and implement this method. + LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); + break; + } + default: + LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", + static_cast(method)); + break; + } +} +void GPU::CallEngineMethod(const MethodCall& method_call) { const EngineID engine = bound_engines[method_call.subchannel]; switch (engine) { @@ -172,4 +259,76 @@ void GPU::CallMethod(const MethodCall& method_call) { } } +void GPU::ProcessBindMethod(const MethodCall& method_call) { + // Bind the current subchannel to the desired engine id. + LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, + method_call.argument); + bound_engines[method_call.subchannel] = static_cast(method_call.argument); +} + +void GPU::ProcessSemaphoreTriggerMethod() { + const auto semaphoreOperationMask = 0xF; + const auto op = + static_cast(regs.semaphore_trigger & semaphoreOperationMask); + if (op == GpuSemaphoreOperation::WriteLong) { + auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); + struct Block { + u32 sequence; + u32 zeros = 0; + u64 timestamp; + }; + + Block block{}; + block.sequence = regs.semaphore_sequence; + // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of + // CoreTiming + block.timestamp = CoreTiming::GetTicks(); + Memory::WriteBlock(*address, &block, sizeof(block)); + } else { + const auto address = + memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); + const u32 word = Memory::Read32(*address); + if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || + (op == GpuSemaphoreOperation::AcquireGequal && + static_cast(word - regs.semaphore_sequence) > 0) || + (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { + // Nothing to do in this case + } else { + regs.acquire_source = true; + regs.acquire_value = regs.semaphore_sequence; + if (op == GpuSemaphoreOperation::AcquireEqual) { + regs.acquire_active = true; + regs.acquire_mode = false; + } else if (op == GpuSemaphoreOperation::AcquireGequal) { + regs.acquire_active = true; + regs.acquire_mode = true; + } else if (op == GpuSemaphoreOperation::AcquireMask) { + // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with + // semaphore_sequence, gives a non-0 result + LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); + } else { + LOG_ERROR(HW_GPU, "Invalid semaphore operation"); + } + } + } +} + +void GPU::ProcessSemaphoreRelease() { + const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); + Memory::Write32(*address, regs.semaphore_release); +} + +void GPU::ProcessSemaphoreAcquire() { + const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); + const u32 word = Memory::Read32(*address); + const auto value = regs.semaphore_acquire; + if (word != value) { + regs.acquire_active = true; + regs.acquire_value = value; + // TODO(kemathe73) figure out how to do the acquire_timeout + regs.acquire_mode = false; + regs.acquire_source = false; + } +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index af5ccd1e9..fb8975811 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -156,6 +156,46 @@ public: /// Returns a const reference to the GPU DMA pusher. const Tegra::DmaPusher& DmaPusher() const; + struct Regs { + static constexpr size_t NUM_REGS = 0x100; + + union { + struct { + INSERT_PADDING_WORDS(0x4); + struct { + u32 address_high; + u32 address_low; + + GPUVAddr SmaphoreAddress() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } + } smaphore_address; + + u32 semaphore_sequence; + u32 semaphore_trigger; + INSERT_PADDING_WORDS(0xC); + + // The puser and the puller share the reference counter, the pusher only has read + // access + u32 reference_count; + INSERT_PADDING_WORDS(0x5); + + u32 semaphore_acquire; + u32 semaphore_release; + INSERT_PADDING_WORDS(0xE4); + + // Puller state + u32 acquire_mode; + u32 acquire_source; + u32 acquire_active; + u32 acquire_timeout; + u32 acquire_value; + }; + std::array reg_array; + }; + } regs{}; + private: std::unique_ptr dma_pusher; std::unique_ptr memory_manager; @@ -173,6 +213,37 @@ private: std::unique_ptr maxwell_dma; /// Inline memory engine std::unique_ptr kepler_memory; + + void ProcessBindMethod(const MethodCall& method_call); + void ProcessSemaphoreTriggerMethod(); + void ProcessSemaphoreRelease(); + void ProcessSemaphoreAcquire(); + + // Calls a GPU puller method. + void CallPullerMethod(const MethodCall& method_call); + // Calls a GPU engine method. + void CallEngineMethod(const MethodCall& method_call); + // Determines where the method should be executed. + bool ExecuteMethodOnEngine(const MethodCall& method_call); }; +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(smaphore_address, 0x4); +ASSERT_REG_POSITION(semaphore_sequence, 0x6); +ASSERT_REG_POSITION(semaphore_trigger, 0x7); +ASSERT_REG_POSITION(reference_count, 0x14); +ASSERT_REG_POSITION(semaphore_acquire, 0x1A); +ASSERT_REG_POSITION(semaphore_release, 0x1B); + +ASSERT_REG_POSITION(acquire_mode, 0x100); +ASSERT_REG_POSITION(acquire_source, 0x101); +ASSERT_REG_POSITION(acquire_active, 0x102); +ASSERT_REG_POSITION(acquire_timeout, 0x103); +ASSERT_REG_POSITION(acquire_value, 0x104); + +#undef ASSERT_REG_POSITION + } // namespace Tegra