Merge pull request #50 from neobrain/pica
Further work on Pica emulation
This commit is contained in:
commit
97fd8fc38d
|
@ -9,6 +9,11 @@ add_definitions(-Wno-attributes)
|
||||||
add_definitions(-DSINGLETHREADED)
|
add_definitions(-DSINGLETHREADED)
|
||||||
add_definitions(${CXX_COMPILE_FLAGS})
|
add_definitions(${CXX_COMPILE_FLAGS})
|
||||||
|
|
||||||
|
find_package(PNG)
|
||||||
|
if (PNG_FOUND)
|
||||||
|
add_definitions(-DHAVE_PNG)
|
||||||
|
endif ()
|
||||||
|
|
||||||
# dependency checking
|
# dependency checking
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules/")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules/")
|
||||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/CMakeTests)
|
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/CMakeTests)
|
||||||
|
|
|
@ -12,7 +12,7 @@ add_executable(citra ${SRCS} ${HEADERS})
|
||||||
if (APPLE)
|
if (APPLE)
|
||||||
target_link_libraries(citra core common video_core iconv pthread ${COREFOUNDATION_LIBRARY} ${OPENGL_LIBRARIES} ${GLEW_LIBRARY} ${GLFW_LIBRARIES})
|
target_link_libraries(citra core common video_core iconv pthread ${COREFOUNDATION_LIBRARY} ${OPENGL_LIBRARIES} ${GLEW_LIBRARY} ${GLFW_LIBRARIES})
|
||||||
else()
|
else()
|
||||||
target_link_libraries(citra core common video_core GLEW pthread X11 Xxf86vm Xi Xcursor ${OPENGL_LIBRARIES} ${GLFW_LIBRARIES} rt ${X11_Xrandr_LIB} ${X11_xv86vmode_LIB})
|
target_link_libraries(citra core common video_core GLEW pthread X11 Xxf86vm Xi Xcursor ${OPENGL_LIBRARIES} ${GLFW_LIBRARIES} rt ${X11_Xrandr_LIB} ${X11_xv86vmode_LIB} ${PNG_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#install(TARGETS citra RUNTIME DESTINATION ${bindir})
|
#install(TARGETS citra RUNTIME DESTINATION ${bindir})
|
||||||
|
|
|
@ -47,7 +47,7 @@ else()
|
||||||
set(RT_LIBRARY rt)
|
set(RT_LIBRARY rt)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_link_libraries(citra-qt core common video_core qhexedit ${ICONV_LIBRARY} ${COREFOUNDATION_LIBRARY} ${QT_LIBRARIES} ${OPENGL_LIBRARIES} ${RT_LIBRARY} ${GLEW_LIBRARY})
|
target_link_libraries(citra-qt core common video_core qhexedit ${ICONV_LIBRARY} ${COREFOUNDATION_LIBRARY} ${QT_LIBRARIES} ${OPENGL_LIBRARIES} ${RT_LIBRARY} ${GLEW_LIBRARY} ${PNG_LIBRARIES})
|
||||||
if(USE_QT5)
|
if(USE_QT5)
|
||||||
target_link_libraries(citra-qt Qt5::Gui Qt5::Widgets Qt5::OpenGL)
|
target_link_libraries(citra-qt Qt5::Gui Qt5::Widgets Qt5::OpenGL)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -2,53 +2,21 @@
|
||||||
// Licensed under GPLv2
|
// Licensed under GPLv2
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include "graphics_cmdlists.hxx"
|
#include <QListView>
|
||||||
|
#include <QPushButton>
|
||||||
|
#include <QVBoxLayout>
|
||||||
#include <QTreeView>
|
#include <QTreeView>
|
||||||
|
|
||||||
extern GraphicsDebugger g_debugger;
|
#include "graphics_cmdlists.hxx"
|
||||||
|
|
||||||
GPUCommandListModel::GPUCommandListModel(QObject* parent) : QAbstractItemModel(parent)
|
GPUCommandListModel::GPUCommandListModel(QObject* parent) : QAbstractListModel(parent)
|
||||||
{
|
{
|
||||||
root_item = new TreeItem(TreeItem::ROOT, 0, NULL, this);
|
|
||||||
|
|
||||||
connect(this, SIGNAL(CommandListCalled()), this, SLOT(OnCommandListCalledInternal()), Qt::UniqueConnection);
|
|
||||||
}
|
|
||||||
|
|
||||||
QModelIndex GPUCommandListModel::index(int row, int column, const QModelIndex& parent) const
|
|
||||||
{
|
|
||||||
TreeItem* item;
|
|
||||||
|
|
||||||
if (!parent.isValid()) {
|
|
||||||
item = root_item;
|
|
||||||
} else {
|
|
||||||
item = (TreeItem*)parent.internalPointer();
|
|
||||||
}
|
|
||||||
|
|
||||||
return createIndex(row, column, item->children[row]);
|
|
||||||
}
|
|
||||||
|
|
||||||
QModelIndex GPUCommandListModel::parent(const QModelIndex& child) const
|
|
||||||
{
|
|
||||||
if (!child.isValid())
|
|
||||||
return QModelIndex();
|
|
||||||
|
|
||||||
TreeItem* item = (TreeItem*)child.internalPointer();
|
|
||||||
|
|
||||||
if (item->parent == NULL)
|
|
||||||
return QModelIndex();
|
|
||||||
|
|
||||||
return createIndex(item->parent->index, 0, item->parent);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int GPUCommandListModel::rowCount(const QModelIndex& parent) const
|
int GPUCommandListModel::rowCount(const QModelIndex& parent) const
|
||||||
{
|
{
|
||||||
TreeItem* item;
|
return pica_trace.writes.size();
|
||||||
if (!parent.isValid()) {
|
|
||||||
item = root_item;
|
|
||||||
} else {
|
|
||||||
item = (TreeItem*)parent.internalPointer();
|
|
||||||
}
|
|
||||||
return item->children.size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int GPUCommandListModel::columnCount(const QModelIndex& parent) const
|
int GPUCommandListModel::columnCount(const QModelIndex& parent) const
|
||||||
|
@ -61,79 +29,67 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const
|
||||||
if (!index.isValid())
|
if (!index.isValid())
|
||||||
return QVariant();
|
return QVariant();
|
||||||
|
|
||||||
const TreeItem* item = (const TreeItem*)index.internalPointer();
|
const auto& writes = pica_trace.writes;
|
||||||
|
const Pica::CommandProcessor::CommandHeader cmd{writes[index.row()].Id()};
|
||||||
if (item->type == TreeItem::COMMAND_LIST)
|
const u32 val{writes[index.row()].Value()};
|
||||||
{
|
|
||||||
const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->index].second;
|
|
||||||
u32 address = command_lists[item->index].first;
|
|
||||||
|
|
||||||
if (role == Qt::DisplayRole && index.column() == 0)
|
|
||||||
{
|
|
||||||
return QVariant(QString("0x%1 bytes at 0x%2").arg(cmdlist.size(), 0, 16).arg(address, 8, 16, QLatin1Char('0')));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// index refers to a specific command
|
|
||||||
const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second;
|
|
||||||
const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index];
|
|
||||||
const Pica::CommandProcessor::CommandHeader& header = cmd.GetHeader();
|
|
||||||
|
|
||||||
if (role == Qt::DisplayRole) {
|
if (role == Qt::DisplayRole) {
|
||||||
QString content;
|
QString content;
|
||||||
if (index.column() == 0) {
|
if (index.column() == 0) {
|
||||||
content = QString::fromLatin1(Pica::Regs::GetCommandName(header.cmd_id).c_str());
|
content = QString::fromLatin1(Pica::Regs::GetCommandName(cmd.cmd_id).c_str());
|
||||||
content.append(" ");
|
content.append(" ");
|
||||||
} else if (index.column() == 1) {
|
} else if (index.column() == 1) {
|
||||||
for (int j = 0; j < cmd.size(); ++j)
|
content.append(QString("%1 ").arg(cmd.hex, 8, 16, QLatin1Char('0')));
|
||||||
content.append(QString("%1 ").arg(cmd[j], 8, 16, QLatin1Char('0')));
|
content.append(QString("%1 ").arg(val, 8, 16, QLatin1Char('0')));
|
||||||
}
|
}
|
||||||
|
|
||||||
return QVariant(content);
|
return QVariant(content);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return QVariant();
|
return QVariant();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUCommandListModel::OnCommandListCalled(const GraphicsDebugger::PicaCommandList& lst, bool is_new)
|
void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& trace)
|
||||||
{
|
|
||||||
emit CommandListCalled();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void GPUCommandListModel::OnCommandListCalledInternal()
|
|
||||||
{
|
{
|
||||||
beginResetModel();
|
beginResetModel();
|
||||||
|
|
||||||
command_lists = GetDebugger()->GetCommandLists();
|
pica_trace = trace;
|
||||||
|
|
||||||
// delete root item and rebuild tree
|
|
||||||
delete root_item;
|
|
||||||
root_item = new TreeItem(TreeItem::ROOT, 0, NULL, this);
|
|
||||||
|
|
||||||
for (int command_list_idx = 0; command_list_idx < command_lists.size(); ++command_list_idx) {
|
|
||||||
TreeItem* command_list_item = new TreeItem(TreeItem::COMMAND_LIST, command_list_idx, root_item, root_item);
|
|
||||||
root_item->children.push_back(command_list_item);
|
|
||||||
|
|
||||||
const GraphicsDebugger::PicaCommandList& command_list = command_lists[command_list_idx].second;
|
|
||||||
for (int command_idx = 0; command_idx < command_list.size(); ++command_idx) {
|
|
||||||
TreeItem* command_item = new TreeItem(TreeItem::COMMAND, command_idx, command_list_item, command_list_item);
|
|
||||||
command_list_item->children.push_back(command_item);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
endResetModel();
|
endResetModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pica Command List"), parent)
|
GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pica Command List"), parent)
|
||||||
{
|
{
|
||||||
GPUCommandListModel* model = new GPUCommandListModel(this);
|
GPUCommandListModel* model = new GPUCommandListModel(this);
|
||||||
g_debugger.RegisterObserver(model);
|
|
||||||
|
|
||||||
QTreeView* tree_widget = new QTreeView;
|
QWidget* main_widget = new QWidget;
|
||||||
tree_widget->setModel(model);
|
|
||||||
tree_widget->setFont(QFont("monospace"));
|
QTreeView* list_widget = new QTreeView;
|
||||||
setWidget(tree_widget);
|
list_widget->setModel(model);
|
||||||
|
list_widget->setFont(QFont("monospace"));
|
||||||
|
list_widget->setRootIsDecorated(false);
|
||||||
|
|
||||||
|
QPushButton* toggle_tracing = new QPushButton(tr("Start Tracing"));
|
||||||
|
|
||||||
|
connect(toggle_tracing, SIGNAL(clicked()), this, SLOT(OnToggleTracing()));
|
||||||
|
connect(this, SIGNAL(TracingFinished(const Pica::DebugUtils::PicaTrace&)),
|
||||||
|
model, SLOT(OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace&)));
|
||||||
|
|
||||||
|
QVBoxLayout* main_layout = new QVBoxLayout;
|
||||||
|
main_layout->addWidget(list_widget);
|
||||||
|
main_layout->addWidget(toggle_tracing);
|
||||||
|
main_widget->setLayout(main_layout);
|
||||||
|
|
||||||
|
setWidget(main_widget);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUCommandListWidget::OnToggleTracing()
|
||||||
|
{
|
||||||
|
if (!Pica::DebugUtils::IsPicaTracing()) {
|
||||||
|
Pica::DebugUtils::StartPicaTracing();
|
||||||
|
} else {
|
||||||
|
pica_trace = Pica::DebugUtils::FinishPicaTracing();
|
||||||
|
emit TracingFinished(*pica_trace);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,53 +4,28 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <QAbstractItemModel>
|
#include <QAbstractListModel>
|
||||||
#include <QDockWidget>
|
#include <QDockWidget>
|
||||||
|
|
||||||
#include "video_core/gpu_debugger.h"
|
#include "video_core/gpu_debugger.h"
|
||||||
|
#include "video_core/debug_utils/debug_utils.h"
|
||||||
|
|
||||||
// TODO: Rename class, since it's not actually a list model anymore...
|
class GPUCommandListModel : public QAbstractListModel
|
||||||
class GPUCommandListModel : public QAbstractItemModel, public GraphicsDebugger::DebuggerObserver
|
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GPUCommandListModel(QObject* parent);
|
GPUCommandListModel(QObject* parent);
|
||||||
|
|
||||||
QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const;
|
|
||||||
QModelIndex parent(const QModelIndex& child) const;
|
|
||||||
int columnCount(const QModelIndex& parent = QModelIndex()) const;
|
int columnCount(const QModelIndex& parent = QModelIndex()) const;
|
||||||
int rowCount(const QModelIndex& parent = QModelIndex()) const override;
|
int rowCount(const QModelIndex& parent = QModelIndex()) const override;
|
||||||
QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
|
QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
|
||||||
|
|
||||||
public:
|
|
||||||
void OnCommandListCalled(const GraphicsDebugger::PicaCommandList& lst, bool is_new) override;
|
|
||||||
|
|
||||||
public slots:
|
public slots:
|
||||||
void OnCommandListCalledInternal();
|
void OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& trace);
|
||||||
|
|
||||||
signals:
|
|
||||||
void CommandListCalled();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct TreeItem : public QObject
|
Pica::DebugUtils::PicaTrace pica_trace;
|
||||||
{
|
|
||||||
enum Type {
|
|
||||||
ROOT,
|
|
||||||
COMMAND_LIST,
|
|
||||||
COMMAND
|
|
||||||
};
|
|
||||||
|
|
||||||
TreeItem(Type type, int index, TreeItem* item_parent, QObject* parent) : QObject(parent), type(type), index(index), parent(item_parent) {}
|
|
||||||
|
|
||||||
Type type;
|
|
||||||
int index;
|
|
||||||
std::vector<TreeItem*> children;
|
|
||||||
TreeItem* parent;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<std::pair<u32,GraphicsDebugger::PicaCommandList>> command_lists;
|
|
||||||
TreeItem* root_item;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class GPUCommandListWidget : public QDockWidget
|
class GPUCommandListWidget : public QDockWidget
|
||||||
|
@ -60,5 +35,12 @@ class GPUCommandListWidget : public QDockWidget
|
||||||
public:
|
public:
|
||||||
GPUCommandListWidget(QWidget* parent = 0);
|
GPUCommandListWidget(QWidget* parent = 0);
|
||||||
|
|
||||||
|
public slots:
|
||||||
|
void OnToggleTracing();
|
||||||
|
|
||||||
|
signals:
|
||||||
|
void TracingFinished(const Pica::DebugUtils::PicaTrace&);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
std::unique_ptr<Pica::DebugUtils::PicaTrace> pica_trace;
|
||||||
};
|
};
|
||||||
|
|
|
@ -52,11 +52,11 @@ GMainWindow::GMainWindow()
|
||||||
|
|
||||||
graphicsWidget = new GPUCommandStreamWidget(this);
|
graphicsWidget = new GPUCommandStreamWidget(this);
|
||||||
addDockWidget(Qt::RightDockWidgetArea, graphicsWidget);
|
addDockWidget(Qt::RightDockWidgetArea, graphicsWidget);
|
||||||
callstackWidget->hide();
|
graphicsWidget ->hide();
|
||||||
|
|
||||||
graphicsCommandsWidget = new GPUCommandListWidget(this);
|
graphicsCommandsWidget = new GPUCommandListWidget(this);
|
||||||
addDockWidget(Qt::RightDockWidgetArea, graphicsCommandsWidget);
|
addDockWidget(Qt::RightDockWidgetArea, graphicsCommandsWidget);
|
||||||
callstackWidget->hide();
|
graphicsCommandsWidget->hide();
|
||||||
|
|
||||||
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
|
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
|
||||||
debug_menu->addAction(disasmWidget->toggleViewAction());
|
debug_menu->addAction(disasmWidget->toggleViewAction());
|
||||||
|
|
|
@ -36,39 +36,55 @@ static inline u8* GetCommandBuffer(u32 thread_id) {
|
||||||
0x800 + (thread_id * sizeof(CommandBuffer)));
|
0x800 + (thread_id * sizeof(CommandBuffer)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index) {
|
||||||
|
if (0 == g_shared_memory)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
_dbg_assert_msg_(GSP, screen_index < 2, "Invalid screen index");
|
||||||
|
|
||||||
|
// For each thread there are two FrameBufferUpdate fields
|
||||||
|
u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate);
|
||||||
|
return (FrameBufferUpdate*)Kernel::GetSharedMemoryPointer(g_shared_memory, offset);
|
||||||
|
}
|
||||||
|
|
||||||
/// Gets a pointer to the interrupt relay queue for a given thread index
|
/// Gets a pointer to the interrupt relay queue for a given thread index
|
||||||
static inline InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id) {
|
static inline InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id) {
|
||||||
return (InterruptRelayQueue*)Kernel::GetSharedMemoryPointer(g_shared_memory,
|
return (InterruptRelayQueue*)Kernel::GetSharedMemoryPointer(g_shared_memory,
|
||||||
sizeof(InterruptRelayQueue) * thread_id);
|
sizeof(InterruptRelayQueue) * thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WriteHWRegs(u32 base_address, u32 size_in_bytes, const u32* data) {
|
||||||
|
// TODO: Return proper error codes
|
||||||
|
if (base_address + size_in_bytes >= 0x420000) {
|
||||||
|
ERROR_LOG(GPU, "Write address out of range! (address=0x%08x, size=0x%08x)",
|
||||||
|
base_address, size_in_bytes);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// size should be word-aligned
|
||||||
|
if ((size_in_bytes % 4) != 0) {
|
||||||
|
ERROR_LOG(GPU, "Invalid size 0x%08x", size_in_bytes);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size_in_bytes > 0) {
|
||||||
|
GPU::Write<u32>(base_address + 0x1EB00000, *data);
|
||||||
|
|
||||||
|
size_in_bytes -= 4;
|
||||||
|
++data;
|
||||||
|
base_address += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Write a GSP GPU hardware register
|
/// Write a GSP GPU hardware register
|
||||||
void WriteHWRegs(Service::Interface* self) {
|
void WriteHWRegs(Service::Interface* self) {
|
||||||
u32* cmd_buff = Service::GetCommandBuffer();
|
u32* cmd_buff = Service::GetCommandBuffer();
|
||||||
u32 reg_addr = cmd_buff[1];
|
u32 reg_addr = cmd_buff[1];
|
||||||
u32 size = cmd_buff[2];
|
u32 size = cmd_buff[2];
|
||||||
|
|
||||||
// TODO: Return proper error codes
|
|
||||||
if (reg_addr + size >= 0x420000) {
|
|
||||||
ERROR_LOG(GPU, "Write address out of range! (address=0x%08x, size=0x%08x)", reg_addr, size);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// size should be word-aligned
|
|
||||||
if ((size % 4) != 0) {
|
|
||||||
ERROR_LOG(GPU, "Invalid size 0x%08x", size);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32* src = (u32*)Memory::GetPointer(cmd_buff[0x4]);
|
u32* src = (u32*)Memory::GetPointer(cmd_buff[0x4]);
|
||||||
|
|
||||||
while (size > 0) {
|
WriteHWRegs(reg_addr, size, src);
|
||||||
GPU::Write<u32>(reg_addr + 0x1EB00000, *src);
|
|
||||||
|
|
||||||
size -= 4;
|
|
||||||
++src;
|
|
||||||
reg_addr += 4;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a GSP GPU hardware register
|
/// Read a GSP GPU hardware register
|
||||||
|
@ -100,6 +116,40 @@ void ReadHWRegs(Service::Interface* self) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) {
|
||||||
|
u32 base_address = 0x400000;
|
||||||
|
if (info.active_fb == 0) {
|
||||||
|
WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_left1), 4, &info.address_left);
|
||||||
|
WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_right1), 4, &info.address_right);
|
||||||
|
} else {
|
||||||
|
WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_left2), 4, &info.address_left);
|
||||||
|
WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].address_right2), 4, &info.address_right);
|
||||||
|
}
|
||||||
|
WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].stride), 4, &info.stride);
|
||||||
|
WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].color_format), 4, &info.format);
|
||||||
|
WriteHWRegs(base_address + 4 * GPU_REG_INDEX(framebuffer_config[screen_id].active_fb), 4, &info.shown_fb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GSP_GPU::SetBufferSwap service function
|
||||||
|
*
|
||||||
|
* Updates GPU display framebuffer configuration using the specified parameters.
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
* 1 : Screen ID (0 = top screen, 1 = bottom screen)
|
||||||
|
* 2-7 : FrameBufferInfo structure
|
||||||
|
* Outputs:
|
||||||
|
* 1: Result code
|
||||||
|
*/
|
||||||
|
void SetBufferSwap(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Service::GetCommandBuffer();
|
||||||
|
u32 screen_id = cmd_buff[1];
|
||||||
|
FrameBufferInfo* fb_info = (FrameBufferInfo*)&cmd_buff[2];
|
||||||
|
SetBufferSwap(screen_id, *fb_info);
|
||||||
|
|
||||||
|
cmd_buff[1] = 0; // No error
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GSP_GPU::RegisterInterruptRelayQueue service function
|
* GSP_GPU::RegisterInterruptRelayQueue service function
|
||||||
* Inputs:
|
* Inputs:
|
||||||
|
@ -127,6 +177,7 @@ void RegisterInterruptRelayQueue(Service::Interface* self) {
|
||||||
/**
|
/**
|
||||||
* Signals that the specified interrupt type has occurred to userland code
|
* Signals that the specified interrupt type has occurred to userland code
|
||||||
* @param interrupt_id ID of interrupt that is being signalled
|
* @param interrupt_id ID of interrupt that is being signalled
|
||||||
|
* @todo This should probably take a thread_id parameter and only signal this thread?
|
||||||
*/
|
*/
|
||||||
void SignalInterrupt(InterruptId interrupt_id) {
|
void SignalInterrupt(InterruptId interrupt_id) {
|
||||||
if (0 == g_interrupt_event) {
|
if (0 == g_interrupt_event) {
|
||||||
|
@ -152,7 +203,7 @@ void SignalInterrupt(InterruptId interrupt_id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Executes the next GSP command
|
/// Executes the next GSP command
|
||||||
void ExecuteCommand(const Command& command) {
|
void ExecuteCommand(const Command& command, u32 thread_id) {
|
||||||
// Utility function to convert register ID to address
|
// Utility function to convert register ID to address
|
||||||
auto WriteGPURegister = [](u32 id, u32 data) {
|
auto WriteGPURegister = [](u32 id, u32 data) {
|
||||||
GPU::Write<u32>(0x1EF00000 + 4 * id, data);
|
GPU::Write<u32>(0x1EF00000 + 4 * id, data);
|
||||||
|
@ -179,11 +230,6 @@ void ExecuteCommand(const Command& command) {
|
||||||
// TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
|
// TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
|
||||||
WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1);
|
WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1);
|
||||||
|
|
||||||
// TODO: Move this to GPU
|
|
||||||
// TODO: Not sure what units the size is measured in
|
|
||||||
g_debugger.CommandListCalled(params.address,
|
|
||||||
(u32*)Memory::GetPointer(params.address),
|
|
||||||
params.size);
|
|
||||||
SignalInterrupt(InterruptId::P3D);
|
SignalInterrupt(InterruptId::P3D);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -223,6 +269,15 @@ void ExecuteCommand(const Command& command) {
|
||||||
SignalInterrupt(InterruptId::PPF);
|
SignalInterrupt(InterruptId::PPF);
|
||||||
SignalInterrupt(InterruptId::P3D);
|
SignalInterrupt(InterruptId::P3D);
|
||||||
SignalInterrupt(InterruptId::DMA);
|
SignalInterrupt(InterruptId::DMA);
|
||||||
|
|
||||||
|
// Update framebuffer information if requested
|
||||||
|
for (int screen_id = 0; screen_id < 2; ++screen_id) {
|
||||||
|
FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id);
|
||||||
|
if (info->is_dirty)
|
||||||
|
SetBufferSwap(screen_id, info->framebuffer_info[info->index]);
|
||||||
|
|
||||||
|
info->is_dirty = false;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -265,7 +320,7 @@ void TriggerCmdReqQueue(Service::Interface* self) {
|
||||||
g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]);
|
g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]);
|
||||||
|
|
||||||
// Decode and execute command
|
// Decode and execute command
|
||||||
ExecuteCommand(command_buffer->commands[i]);
|
ExecuteCommand(command_buffer->commands[i], thread_id);
|
||||||
|
|
||||||
// Indicates that command has completed
|
// Indicates that command has completed
|
||||||
command_buffer->number_commands = command_buffer->number_commands - 1;
|
command_buffer->number_commands = command_buffer->number_commands - 1;
|
||||||
|
@ -278,7 +333,7 @@ const Interface::FunctionInfo FunctionTable[] = {
|
||||||
{0x00020084, nullptr, "WriteHWRegsWithMask"},
|
{0x00020084, nullptr, "WriteHWRegsWithMask"},
|
||||||
{0x00030082, nullptr, "WriteHWRegRepeat"},
|
{0x00030082, nullptr, "WriteHWRegRepeat"},
|
||||||
{0x00040080, ReadHWRegs, "ReadHWRegs"},
|
{0x00040080, ReadHWRegs, "ReadHWRegs"},
|
||||||
{0x00050200, nullptr, "SetBufferSwap"},
|
{0x00050200, SetBufferSwap, "SetBufferSwap"},
|
||||||
{0x00060082, nullptr, "SetCommandList"},
|
{0x00060082, nullptr, "SetCommandList"},
|
||||||
{0x000700C2, nullptr, "RequestDma"},
|
{0x000700C2, nullptr, "RequestDma"},
|
||||||
{0x00080082, nullptr, "FlushDataCache"},
|
{0x00080082, nullptr, "FlushDataCache"},
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "core/hle/service/service.h"
|
#include "core/hle/service/service.h"
|
||||||
|
|
||||||
|
@ -64,6 +66,34 @@ struct InterruptRelayQueue {
|
||||||
static_assert(sizeof(InterruptRelayQueue) == 0x40,
|
static_assert(sizeof(InterruptRelayQueue) == 0x40,
|
||||||
"InterruptRelayQueue struct has incorrect size");
|
"InterruptRelayQueue struct has incorrect size");
|
||||||
|
|
||||||
|
struct FrameBufferInfo {
|
||||||
|
BitField<0, 1, u32> active_fb; // 0 = first, 1 = second
|
||||||
|
|
||||||
|
u32 address_left;
|
||||||
|
u32 address_right;
|
||||||
|
u32 stride; // maps to 0x1EF00X90 ?
|
||||||
|
u32 format; // maps to 0x1EF00X70 ?
|
||||||
|
u32 shown_fb; // maps to 0x1EF00X78 ?
|
||||||
|
u32 unknown;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(FrameBufferInfo) == 0x1c, "Struct has incorrect size");
|
||||||
|
|
||||||
|
struct FrameBufferUpdate {
|
||||||
|
BitField<0, 1, u8> index; // Index used for GSP::SetBufferSwap
|
||||||
|
BitField<0, 1, u8> is_dirty; // true if GSP should update GPU framebuffer registers
|
||||||
|
u16 pad1;
|
||||||
|
|
||||||
|
FrameBufferInfo framebuffer_info[2];
|
||||||
|
|
||||||
|
u32 pad2;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(FrameBufferUpdate) == 0x40, "Struct has incorrect size");
|
||||||
|
// TODO: Not sure if this padding is correct.
|
||||||
|
// Chances are the second block is stored at offset 0x24 rather than 0x20.
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
static_assert(offsetof(FrameBufferUpdate, framebuffer_info[1]) == 0x20, "FrameBufferInfo element has incorrect alignment");
|
||||||
|
#endif
|
||||||
|
|
||||||
/// GSP command
|
/// GSP command
|
||||||
struct Command {
|
struct Command {
|
||||||
BitField<0, 8, CommandId> id;
|
BitField<0, 8, CommandId> id;
|
||||||
|
|
|
@ -42,7 +42,7 @@ struct Regs {
|
||||||
// depending on the current source line to make sure variable names are unique.
|
// depending on the current source line to make sure variable names are unique.
|
||||||
#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
|
#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
|
||||||
#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
|
#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
|
||||||
#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
|
#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)]
|
||||||
|
|
||||||
// helper macro to make sure the defined structures are of the expected size.
|
// helper macro to make sure the defined structures are of the expected size.
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
|
@ -53,7 +53,7 @@ struct Regs {
|
||||||
#else
|
#else
|
||||||
#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \
|
#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \
|
||||||
static_assert(sizeof(name) == size_in_bytes, \
|
static_assert(sizeof(name) == size_in_bytes, \
|
||||||
"Structure size and register block length don't match");
|
"Structure size and register block length don't match")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
enum class FramebufferFormat : u32 {
|
enum class FramebufferFormat : u32 {
|
||||||
|
|
|
@ -5,6 +5,7 @@ set(SRCS clipper.cpp
|
||||||
utils.cpp
|
utils.cpp
|
||||||
vertex_shader.cpp
|
vertex_shader.cpp
|
||||||
video_core.cpp
|
video_core.cpp
|
||||||
|
debug_utils/debug_utils.cpp
|
||||||
renderer_opengl/renderer_opengl.cpp)
|
renderer_opengl/renderer_opengl.cpp)
|
||||||
|
|
||||||
set(HEADERS clipper.h
|
set(HEADERS clipper.h
|
||||||
|
@ -17,6 +18,7 @@ set(HEADERS clipper.h
|
||||||
renderer_base.h
|
renderer_base.h
|
||||||
vertex_shader.h
|
vertex_shader.h
|
||||||
video_core.h
|
video_core.h
|
||||||
|
debug_utils/debug_utils.h
|
||||||
renderer_opengl/renderer_opengl.h)
|
renderer_opengl/renderer_opengl.h)
|
||||||
|
|
||||||
add_library(video_core STATIC ${SRCS} ${HEADERS})
|
add_library(video_core STATIC ${SRCS} ${HEADERS})
|
||||||
|
|
|
@ -2,12 +2,14 @@
|
||||||
// Licensed under GPLv2
|
// Licensed under GPLv2
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "clipper.h"
|
||||||
#include "command_processor.h"
|
#include "command_processor.h"
|
||||||
#include "math.h"
|
#include "math.h"
|
||||||
#include "pica.h"
|
#include "pica.h"
|
||||||
#include "primitive_assembly.h"
|
#include "primitive_assembly.h"
|
||||||
#include "vertex_shader.h"
|
#include "vertex_shader.h"
|
||||||
|
|
||||||
|
#include "debug_utils/debug_utils.h"
|
||||||
|
|
||||||
namespace Pica {
|
namespace Pica {
|
||||||
|
|
||||||
|
@ -23,15 +25,24 @@ static u32 uniform_write_buffer[4];
|
||||||
static u32 vs_binary_write_offset = 0;
|
static u32 vs_binary_write_offset = 0;
|
||||||
static u32 vs_swizzle_write_offset = 0;
|
static u32 vs_swizzle_write_offset = 0;
|
||||||
|
|
||||||
static inline void WritePicaReg(u32 id, u32 value) {
|
static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
|
|
||||||
|
if (id >= registers.NumIds())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value
|
||||||
u32 old_value = registers[id];
|
u32 old_value = registers[id];
|
||||||
registers[id] = value;
|
registers[id] = (old_value & ~mask) | (value & mask);
|
||||||
|
|
||||||
|
DebugUtils::OnPicaRegWrite(id, registers[id]);
|
||||||
|
|
||||||
switch(id) {
|
switch(id) {
|
||||||
// It seems like these trigger vertex rendering
|
// It seems like these trigger vertex rendering
|
||||||
case PICA_REG_INDEX(trigger_draw):
|
case PICA_REG_INDEX(trigger_draw):
|
||||||
case PICA_REG_INDEX(trigger_draw_indexed):
|
case PICA_REG_INDEX(trigger_draw_indexed):
|
||||||
{
|
{
|
||||||
|
DebugUtils::DumpTevStageConfig(registers.GetTevStages());
|
||||||
|
|
||||||
const auto& attribute_config = registers.vertex_attributes;
|
const auto& attribute_config = registers.vertex_attributes;
|
||||||
const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress());
|
const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress());
|
||||||
|
|
||||||
|
@ -68,6 +79,10 @@ static inline void WritePicaReg(u32 id, u32 value) {
|
||||||
const u16* index_address_16 = (u16*)index_address_8;
|
const u16* index_address_16 = (u16*)index_address_8;
|
||||||
bool index_u16 = (bool)index_info.format;
|
bool index_u16 = (bool)index_info.format;
|
||||||
|
|
||||||
|
DebugUtils::GeometryDumper geometry_dumper;
|
||||||
|
PrimitiveAssembler<VertexShader::OutputVertex> clipper_primitive_assembler(registers.triangle_topology.Value());
|
||||||
|
PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(registers.triangle_topology.Value());
|
||||||
|
|
||||||
for (int index = 0; index < registers.num_vertices; ++index)
|
for (int index = 0; index < registers.num_vertices; ++index)
|
||||||
{
|
{
|
||||||
int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
|
int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
|
||||||
|
@ -95,14 +110,28 @@ static inline void WritePicaReg(u32 id, u32 value) {
|
||||||
input.attr[i][comp].ToFloat32());
|
input.attr[i][comp].ToFloat32());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: When dumping geometry, we simply assume that the first input attribute
|
||||||
|
// corresponds to the position for now.
|
||||||
|
DebugUtils::GeometryDumper::Vertex dumped_vertex = {
|
||||||
|
input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32()
|
||||||
|
};
|
||||||
|
using namespace std::placeholders;
|
||||||
|
dumping_primitive_assembler.SubmitVertex(dumped_vertex,
|
||||||
|
std::bind(&DebugUtils::GeometryDumper::AddTriangle,
|
||||||
|
&geometry_dumper, _1, _2, _3));
|
||||||
|
|
||||||
|
// Send to vertex shader
|
||||||
VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes());
|
VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes());
|
||||||
|
|
||||||
if (is_indexed) {
|
if (is_indexed) {
|
||||||
// TODO: Add processed vertex to vertex cache!
|
// TODO: Add processed vertex to vertex cache!
|
||||||
}
|
}
|
||||||
|
|
||||||
PrimitiveAssembly::SubmitVertex(output);
|
// Send to triangle clipper
|
||||||
|
clipper_primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle);
|
||||||
}
|
}
|
||||||
|
geometry_dumper.Dump();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,14 +236,17 @@ static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) {
|
||||||
|
|
||||||
u32* read_pointer = (u32*)first_command_word;
|
u32* read_pointer = (u32*)first_command_word;
|
||||||
|
|
||||||
// TODO: Take parameter mask into consideration!
|
const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) |
|
||||||
|
((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) |
|
||||||
|
((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) |
|
||||||
|
((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u);
|
||||||
|
|
||||||
WritePicaReg(header.cmd_id, *read_pointer);
|
WritePicaReg(header.cmd_id, *read_pointer, write_mask);
|
||||||
read_pointer += 2;
|
read_pointer += 2;
|
||||||
|
|
||||||
for (int i = 1; i < 1+header.extra_data_length; ++i) {
|
for (int i = 1; i < 1+header.extra_data_length; ++i) {
|
||||||
u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0);
|
u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0);
|
||||||
WritePicaReg(cmd, *read_pointer);
|
WritePicaReg(cmd, *read_pointer, write_mask);
|
||||||
++read_pointer;
|
++read_pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,11 +17,22 @@ union CommandHeader {
|
||||||
u32 hex;
|
u32 hex;
|
||||||
|
|
||||||
BitField< 0, 16, u32> cmd_id;
|
BitField< 0, 16, u32> cmd_id;
|
||||||
|
|
||||||
|
// parameter_mask:
|
||||||
|
// Mask applied to the input value to make it possible to update
|
||||||
|
// parts of a register without overwriting its other fields.
|
||||||
|
// first bit: 0x000000FF
|
||||||
|
// second bit: 0x0000FF00
|
||||||
|
// third bit: 0x00FF0000
|
||||||
|
// fourth bit: 0xFF000000
|
||||||
BitField<16, 4, u32> parameter_mask;
|
BitField<16, 4, u32> parameter_mask;
|
||||||
|
|
||||||
BitField<20, 11, u32> extra_data_length;
|
BitField<20, 11, u32> extra_data_length;
|
||||||
|
|
||||||
BitField<31, 1, u32> group_commands;
|
BitField<31, 1, u32> group_commands;
|
||||||
};
|
};
|
||||||
static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout");
|
static_assert(std::is_standard_layout<CommandHeader>::value == true,
|
||||||
|
"CommandHeader does not use standard layout");
|
||||||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
|
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
|
||||||
|
|
||||||
void ProcessCommandList(const u32* list, u32 size);
|
void ProcessCommandList(const u32* list, u32 size);
|
||||||
|
|
|
@ -0,0 +1,522 @@
|
||||||
|
// Copyright 2014 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <fstream>
|
||||||
|
#include <mutex>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef HAVE_PNG
|
||||||
|
#include <png.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/file_util.h"
|
||||||
|
|
||||||
|
#include "video_core/pica.h"
|
||||||
|
|
||||||
|
#include "debug_utils.h"
|
||||||
|
|
||||||
|
namespace Pica {
|
||||||
|
|
||||||
|
namespace DebugUtils {
|
||||||
|
|
||||||
|
void GeometryDumper::AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2) {
|
||||||
|
vertices.push_back(v0);
|
||||||
|
vertices.push_back(v1);
|
||||||
|
vertices.push_back(v2);
|
||||||
|
|
||||||
|
int num_vertices = vertices.size();
|
||||||
|
faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 });
|
||||||
|
}
|
||||||
|
|
||||||
|
void GeometryDumper::Dump() {
|
||||||
|
// NOTE: Permanently enabling this just trashes the hard disk for no reason.
|
||||||
|
// Hence, this is currently disabled.
|
||||||
|
return;
|
||||||
|
|
||||||
|
static int index = 0;
|
||||||
|
std::string filename = std::string("geometry_dump") + std::to_string(++index) + ".obj";
|
||||||
|
|
||||||
|
std::ofstream file(filename);
|
||||||
|
|
||||||
|
for (const auto& vertex : vertices) {
|
||||||
|
file << "v " << vertex.pos[0]
|
||||||
|
<< " " << vertex.pos[1]
|
||||||
|
<< " " << vertex.pos[2] << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const Face& face : faces) {
|
||||||
|
file << "f " << 1+face.index[0]
|
||||||
|
<< " " << 1+face.index[1]
|
||||||
|
<< " " << 1+face.index[2] << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma pack(1)
|
||||||
|
struct DVLBHeader {
|
||||||
|
enum : u32 {
|
||||||
|
MAGIC_WORD = 0x424C5644, // "DVLB"
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 magic_word;
|
||||||
|
u32 num_programs;
|
||||||
|
// u32 dvle_offset_table[];
|
||||||
|
};
|
||||||
|
static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size");
|
||||||
|
|
||||||
|
struct DVLPHeader {
|
||||||
|
enum : u32 {
|
||||||
|
MAGIC_WORD = 0x504C5644, // "DVLP"
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 magic_word;
|
||||||
|
u32 version;
|
||||||
|
u32 binary_offset; // relative to DVLP start
|
||||||
|
u32 binary_size_words;
|
||||||
|
u32 swizzle_patterns_offset;
|
||||||
|
u32 swizzle_patterns_num_entries;
|
||||||
|
u32 unk2;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size");
|
||||||
|
|
||||||
|
struct DVLEHeader {
|
||||||
|
enum : u32 {
|
||||||
|
MAGIC_WORD = 0x454c5644, // "DVLE"
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ShaderType : u8 {
|
||||||
|
VERTEX = 0,
|
||||||
|
GEOMETRY = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 magic_word;
|
||||||
|
u16 pad1;
|
||||||
|
ShaderType type;
|
||||||
|
u8 pad2;
|
||||||
|
u32 main_offset_words; // offset within binary blob
|
||||||
|
u32 endmain_offset_words;
|
||||||
|
u32 pad3;
|
||||||
|
u32 pad4;
|
||||||
|
u32 constant_table_offset;
|
||||||
|
u32 constant_table_size; // number of entries
|
||||||
|
u32 label_table_offset;
|
||||||
|
u32 label_table_size;
|
||||||
|
u32 output_register_table_offset;
|
||||||
|
u32 output_register_table_size;
|
||||||
|
u32 uniform_table_offset;
|
||||||
|
u32 uniform_table_size;
|
||||||
|
u32 symbol_table_offset;
|
||||||
|
u32 symbol_table_size;
|
||||||
|
|
||||||
|
};
|
||||||
|
static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size");
|
||||||
|
#pragma pack()
|
||||||
|
|
||||||
|
void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
|
||||||
|
u32 main_offset, const Regs::VSOutputAttributes* output_attributes)
|
||||||
|
{
|
||||||
|
// NOTE: Permanently enabling this just trashes hard disks for no reason.
|
||||||
|
// Hence, this is currently disabled.
|
||||||
|
return;
|
||||||
|
|
||||||
|
struct StuffToWrite {
|
||||||
|
u8* pointer;
|
||||||
|
u32 size;
|
||||||
|
};
|
||||||
|
std::vector<StuffToWrite> writing_queue;
|
||||||
|
u32 write_offset = 0;
|
||||||
|
|
||||||
|
auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) {
|
||||||
|
writing_queue.push_back({pointer, size});
|
||||||
|
u32 old_write_offset = write_offset;
|
||||||
|
write_offset += size;
|
||||||
|
return old_write_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
// First off, try to translate Pica state (one enum for output attribute type and component)
|
||||||
|
// into shbin format (separate type and component mask).
|
||||||
|
union OutputRegisterInfo {
|
||||||
|
enum Type : u64 {
|
||||||
|
POSITION = 0,
|
||||||
|
COLOR = 2,
|
||||||
|
TEXCOORD0 = 3,
|
||||||
|
TEXCOORD1 = 5,
|
||||||
|
TEXCOORD2 = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
BitField< 0, 64, u64> hex;
|
||||||
|
|
||||||
|
BitField< 0, 16, Type> type;
|
||||||
|
BitField<16, 16, u64> id;
|
||||||
|
BitField<32, 4, u64> component_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This is put into a try-catch block to make sure we notice unknown configurations.
|
||||||
|
std::vector<OutputRegisterInfo> output_info_table;
|
||||||
|
for (int i = 0; i < 7; ++i) {
|
||||||
|
using OutputAttributes = Pica::Regs::VSOutputAttributes;
|
||||||
|
|
||||||
|
// TODO: It's still unclear how the attribute components map to the register!
|
||||||
|
// Once we know that, this code probably will not make much sense anymore.
|
||||||
|
std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = {
|
||||||
|
{ OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} },
|
||||||
|
{ OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} },
|
||||||
|
{ OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} },
|
||||||
|
{ OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} },
|
||||||
|
{ OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} },
|
||||||
|
{ OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} },
|
||||||
|
{ OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} },
|
||||||
|
{ OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} },
|
||||||
|
{ OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} },
|
||||||
|
{ OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} },
|
||||||
|
{ OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} },
|
||||||
|
{ OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} },
|
||||||
|
{ OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} },
|
||||||
|
{ OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto& semantic : std::vector<OutputAttributes::Semantic>{
|
||||||
|
output_attributes[i].map_x,
|
||||||
|
output_attributes[i].map_y,
|
||||||
|
output_attributes[i].map_z,
|
||||||
|
output_attributes[i].map_w }) {
|
||||||
|
if (semantic == OutputAttributes::INVALID)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
OutputRegisterInfo::Type type = map.at(semantic).first;
|
||||||
|
u32 component_mask = map.at(semantic).second;
|
||||||
|
|
||||||
|
auto it = std::find_if(output_info_table.begin(), output_info_table.end(),
|
||||||
|
[&i, &type](const OutputRegisterInfo& info) {
|
||||||
|
return info.id == i && info.type == type;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (it == output_info_table.end()) {
|
||||||
|
output_info_table.push_back({});
|
||||||
|
output_info_table.back().type = type;
|
||||||
|
output_info_table.back().component_mask = component_mask;
|
||||||
|
output_info_table.back().id = i;
|
||||||
|
} else {
|
||||||
|
it->component_mask = it->component_mask | component_mask;
|
||||||
|
}
|
||||||
|
} catch (const std::out_of_range& oor) {
|
||||||
|
_dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping");
|
||||||
|
ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
|
||||||
|
(int)output_attributes[i].map_x.Value(),
|
||||||
|
(int)output_attributes[i].map_y.Value(),
|
||||||
|
(int)output_attributes[i].map_z.Value(),
|
||||||
|
(int)output_attributes[i].map_w.Value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct {
|
||||||
|
DVLBHeader header;
|
||||||
|
u32 dvle_offset;
|
||||||
|
} dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE
|
||||||
|
|
||||||
|
DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD };
|
||||||
|
DVLEHeader dvle{ DVLEHeader::MAGIC_WORD };
|
||||||
|
|
||||||
|
QueueForWriting((u8*)&dvlb, sizeof(dvlb));
|
||||||
|
u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp));
|
||||||
|
dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle));
|
||||||
|
|
||||||
|
// TODO: Reduce the amount of binary code written to relevant portions
|
||||||
|
dvlp.binary_offset = write_offset - dvlp_offset;
|
||||||
|
dvlp.binary_size_words = binary_size;
|
||||||
|
QueueForWriting((u8*)binary_data, binary_size * sizeof(u32));
|
||||||
|
|
||||||
|
dvlp.swizzle_patterns_offset = write_offset - dvlp_offset;
|
||||||
|
dvlp.swizzle_patterns_num_entries = swizzle_size;
|
||||||
|
u32 dummy = 0;
|
||||||
|
for (int i = 0; i < swizzle_size; ++i) {
|
||||||
|
QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i]));
|
||||||
|
QueueForWriting((u8*)&dummy, sizeof(dummy));
|
||||||
|
}
|
||||||
|
|
||||||
|
dvle.main_offset_words = main_offset;
|
||||||
|
dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
|
||||||
|
dvle.output_register_table_size = output_info_table.size();
|
||||||
|
QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo));
|
||||||
|
|
||||||
|
// TODO: Create a label table for "main"
|
||||||
|
|
||||||
|
|
||||||
|
// Write data to file
|
||||||
|
static int dump_index = 0;
|
||||||
|
std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin");
|
||||||
|
std::ofstream file(filename, std::ios_base::out | std::ios_base::binary);
|
||||||
|
|
||||||
|
for (auto& chunk : writing_queue) {
|
||||||
|
file.write((char*)chunk.pointer, chunk.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::unique_ptr<PicaTrace> pica_trace;
|
||||||
|
static std::mutex pica_trace_mutex;
|
||||||
|
static int is_pica_tracing = false;
|
||||||
|
|
||||||
|
void StartPicaTracing()
|
||||||
|
{
|
||||||
|
if (is_pica_tracing) {
|
||||||
|
ERROR_LOG(GPU, "StartPicaTracing called even though tracing already running!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pica_trace_mutex.lock();
|
||||||
|
pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace);
|
||||||
|
|
||||||
|
is_pica_tracing = true;
|
||||||
|
pica_trace_mutex.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsPicaTracing()
|
||||||
|
{
|
||||||
|
return is_pica_tracing;
|
||||||
|
}
|
||||||
|
|
||||||
|
void OnPicaRegWrite(u32 id, u32 value)
|
||||||
|
{
|
||||||
|
// Double check for is_pica_tracing to avoid pointless locking overhead
|
||||||
|
if (!is_pica_tracing)
|
||||||
|
return;
|
||||||
|
|
||||||
|
std::unique_lock<std::mutex> lock(pica_trace_mutex);
|
||||||
|
|
||||||
|
if (!is_pica_tracing)
|
||||||
|
return;
|
||||||
|
|
||||||
|
pica_trace->writes.push_back({id, value});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<PicaTrace> FinishPicaTracing()
|
||||||
|
{
|
||||||
|
if (!is_pica_tracing) {
|
||||||
|
ERROR_LOG(GPU, "FinishPicaTracing called even though tracing already running!");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// signalize that no further tracing should be performed
|
||||||
|
is_pica_tracing = false;
|
||||||
|
|
||||||
|
// Wait until running tracing is finished
|
||||||
|
pica_trace_mutex.lock();
|
||||||
|
std::unique_ptr<PicaTrace> ret(std::move(pica_trace));
|
||||||
|
pica_trace_mutex.unlock();
|
||||||
|
return std::move(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
|
||||||
|
// NOTE: Permanently enabling this just trashes hard disks for no reason.
|
||||||
|
// Hence, this is currently disabled.
|
||||||
|
return;
|
||||||
|
|
||||||
|
#ifndef HAVE_PNG
|
||||||
|
return;
|
||||||
|
#else
|
||||||
|
if (!data)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Write data to file
|
||||||
|
static int dump_index = 0;
|
||||||
|
std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png");
|
||||||
|
u32 row_stride = texture_config.width * 3;
|
||||||
|
|
||||||
|
u8* buf;
|
||||||
|
|
||||||
|
char title[] = "Citra texture dump";
|
||||||
|
char title_key[] = "Title";
|
||||||
|
png_structp png_ptr = nullptr;
|
||||||
|
png_infop info_ptr = nullptr;
|
||||||
|
|
||||||
|
// Open file for writing (binary mode)
|
||||||
|
File::IOFile fp(filename, "wb");
|
||||||
|
|
||||||
|
// Initialize write structure
|
||||||
|
png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
|
||||||
|
if (png_ptr == nullptr) {
|
||||||
|
ERROR_LOG(GPU, "Could not allocate write struct\n");
|
||||||
|
goto finalise;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize info structure
|
||||||
|
info_ptr = png_create_info_struct(png_ptr);
|
||||||
|
if (info_ptr == nullptr) {
|
||||||
|
ERROR_LOG(GPU, "Could not allocate info struct\n");
|
||||||
|
goto finalise;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup Exception handling
|
||||||
|
if (setjmp(png_jmpbuf(png_ptr))) {
|
||||||
|
ERROR_LOG(GPU, "Error during png creation\n");
|
||||||
|
goto finalise;
|
||||||
|
}
|
||||||
|
|
||||||
|
png_init_io(png_ptr, fp.GetHandle());
|
||||||
|
|
||||||
|
// Write header (8 bit colour depth)
|
||||||
|
png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height,
|
||||||
|
8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE,
|
||||||
|
PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
|
||||||
|
|
||||||
|
png_text title_text;
|
||||||
|
title_text.compression = PNG_TEXT_COMPRESSION_NONE;
|
||||||
|
title_text.key = title_key;
|
||||||
|
title_text.text = title;
|
||||||
|
png_set_text(png_ptr, info_ptr, &title_text, 1);
|
||||||
|
|
||||||
|
png_write_info(png_ptr, info_ptr);
|
||||||
|
|
||||||
|
buf = new u8[row_stride * texture_config.height];
|
||||||
|
for (int y = 0; y < texture_config.height; ++y) {
|
||||||
|
for (int x = 0; x < texture_config.width; ++x) {
|
||||||
|
// Cf. rasterizer code for an explanation of this algorithm.
|
||||||
|
int texel_index_within_tile = 0;
|
||||||
|
for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
|
||||||
|
int sub_tile_width = 1 << block_size_index;
|
||||||
|
int sub_tile_height = 1 << block_size_index;
|
||||||
|
|
||||||
|
int sub_tile_index = (x & sub_tile_width) << block_size_index;
|
||||||
|
sub_tile_index += 2 * ((y & sub_tile_height) << block_size_index);
|
||||||
|
texel_index_within_tile += sub_tile_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int block_width = 8;
|
||||||
|
const int block_height = 8;
|
||||||
|
|
||||||
|
int coarse_x = (x / block_width) * block_width;
|
||||||
|
int coarse_y = (y / block_height) * block_height;
|
||||||
|
|
||||||
|
u8* source_ptr = (u8*)data + coarse_x * block_height * 3 + coarse_y * row_stride + texel_index_within_tile * 3;
|
||||||
|
buf[3 * x + y * row_stride ] = source_ptr[2];
|
||||||
|
buf[3 * x + y * row_stride + 1] = source_ptr[1];
|
||||||
|
buf[3 * x + y * row_stride + 2] = source_ptr[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write image data
|
||||||
|
for (auto y = 0; y < texture_config.height; ++y)
|
||||||
|
{
|
||||||
|
u8* row_ptr = (u8*)buf + y * row_stride;
|
||||||
|
u8* ptr = row_ptr;
|
||||||
|
png_write_row(png_ptr, row_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] buf;
|
||||||
|
|
||||||
|
// End write
|
||||||
|
png_write_end(png_ptr, nullptr);
|
||||||
|
|
||||||
|
finalise:
|
||||||
|
if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
|
||||||
|
if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
|
||||||
|
{
|
||||||
|
using Source = Pica::Regs::TevStageConfig::Source;
|
||||||
|
using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
|
||||||
|
using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
|
||||||
|
using Operation = Pica::Regs::TevStageConfig::Operation;
|
||||||
|
|
||||||
|
std::string stage_info = "Tev setup:\n";
|
||||||
|
for (int index = 0; index < stages.size(); ++index) {
|
||||||
|
const auto& tev_stage = stages[index];
|
||||||
|
|
||||||
|
const std::map<Source, std::string> source_map = {
|
||||||
|
{ Source::PrimaryColor, "PrimaryColor" },
|
||||||
|
{ Source::Texture0, "Texture0" },
|
||||||
|
{ Source::Constant, "Constant" },
|
||||||
|
{ Source::Previous, "Previous" },
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::map<ColorModifier, std::string> color_modifier_map = {
|
||||||
|
{ ColorModifier::SourceColor, { "%source.rgb" } }
|
||||||
|
};
|
||||||
|
const std::map<AlphaModifier, std::string> alpha_modifier_map = {
|
||||||
|
{ AlphaModifier::SourceAlpha, "%source.a" }
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<Operation, std::string> combiner_map = {
|
||||||
|
{ Operation::Replace, "%source1" },
|
||||||
|
{ Operation::Modulate, "(%source1 * %source2) / 255" },
|
||||||
|
};
|
||||||
|
|
||||||
|
auto ReplacePattern =
|
||||||
|
[](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string {
|
||||||
|
size_t start = input.find(pattern);
|
||||||
|
if (start == std::string::npos)
|
||||||
|
return input;
|
||||||
|
|
||||||
|
std::string ret = input;
|
||||||
|
ret.replace(start, pattern.length(), replacement);
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
auto GetColorSourceStr =
|
||||||
|
[&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) {
|
||||||
|
auto src_it = source_map.find(src);
|
||||||
|
std::string src_str = "Unknown";
|
||||||
|
if (src_it != source_map.end())
|
||||||
|
src_str = src_it->second;
|
||||||
|
|
||||||
|
auto modifier_it = color_modifier_map.find(modifier);
|
||||||
|
std::string modifier_str = "%source.????";
|
||||||
|
if (modifier_it != color_modifier_map.end())
|
||||||
|
modifier_str = modifier_it->second;
|
||||||
|
|
||||||
|
return ReplacePattern(modifier_str, "%source", src_str);
|
||||||
|
};
|
||||||
|
auto GetColorCombinerStr =
|
||||||
|
[&](const Regs::TevStageConfig& tev_stage) {
|
||||||
|
auto op_it = combiner_map.find(tev_stage.color_op);
|
||||||
|
std::string op_str = "Unknown op (%source1, %source2, %source3)";
|
||||||
|
if (op_it != combiner_map.end())
|
||||||
|
op_str = op_it->second;
|
||||||
|
|
||||||
|
op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1));
|
||||||
|
op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2));
|
||||||
|
return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3));
|
||||||
|
};
|
||||||
|
auto GetAlphaSourceStr =
|
||||||
|
[&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) {
|
||||||
|
auto src_it = source_map.find(src);
|
||||||
|
std::string src_str = "Unknown";
|
||||||
|
if (src_it != source_map.end())
|
||||||
|
src_str = src_it->second;
|
||||||
|
|
||||||
|
auto modifier_it = alpha_modifier_map.find(modifier);
|
||||||
|
std::string modifier_str = "%source.????";
|
||||||
|
if (modifier_it != alpha_modifier_map.end())
|
||||||
|
modifier_str = modifier_it->second;
|
||||||
|
|
||||||
|
return ReplacePattern(modifier_str, "%source", src_str);
|
||||||
|
};
|
||||||
|
auto GetAlphaCombinerStr =
|
||||||
|
[&](const Regs::TevStageConfig& tev_stage) {
|
||||||
|
auto op_it = combiner_map.find(tev_stage.alpha_op);
|
||||||
|
std::string op_str = "Unknown op (%source1, %source2, %source3)";
|
||||||
|
if (op_it != combiner_map.end())
|
||||||
|
op_str = op_it->second;
|
||||||
|
|
||||||
|
op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
|
||||||
|
op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
|
||||||
|
return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
|
||||||
|
};
|
||||||
|
|
||||||
|
stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_LOG(GPU, "%s", stage_info.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
} // namespace
|
|
@ -0,0 +1,66 @@
|
||||||
|
// Copyright 2014 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "video_core/pica.h"
|
||||||
|
|
||||||
|
namespace Pica {
|
||||||
|
|
||||||
|
namespace DebugUtils {
|
||||||
|
|
||||||
|
// Simple utility class for dumping geometry data to an OBJ file
|
||||||
|
class GeometryDumper {
|
||||||
|
public:
|
||||||
|
struct Vertex {
|
||||||
|
std::array<float,3> pos;
|
||||||
|
};
|
||||||
|
|
||||||
|
void AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2);
|
||||||
|
|
||||||
|
void Dump();
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Face {
|
||||||
|
int index[3];
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<Vertex> vertices;
|
||||||
|
std::vector<Face> faces;
|
||||||
|
};
|
||||||
|
|
||||||
|
void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
|
||||||
|
u32 main_offset, const Regs::VSOutputAttributes* output_attributes);
|
||||||
|
|
||||||
|
|
||||||
|
// Utility class to log Pica commands.
|
||||||
|
struct PicaTrace {
|
||||||
|
struct Write : public std::pair<u32,u32> {
|
||||||
|
Write(u32 id, u32 value) : std::pair<u32,u32>(id, value) {}
|
||||||
|
|
||||||
|
u32& Id() { return first; }
|
||||||
|
const u32& Id() const { return first; }
|
||||||
|
|
||||||
|
u32& Value() { return second; }
|
||||||
|
const u32& Value() const { return second; }
|
||||||
|
};
|
||||||
|
std::vector<Write> writes;
|
||||||
|
};
|
||||||
|
|
||||||
|
void StartPicaTracing();
|
||||||
|
bool IsPicaTracing();
|
||||||
|
void OnPicaRegWrite(u32 id, u32 value);
|
||||||
|
std::unique_ptr<PicaTrace> FinishPicaTracing();
|
||||||
|
|
||||||
|
void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
|
||||||
|
|
||||||
|
void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
} // namespace
|
|
@ -18,19 +18,6 @@
|
||||||
class GraphicsDebugger
|
class GraphicsDebugger
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
// A few utility structs used to expose data
|
|
||||||
// A vector of commands represented by their raw byte sequence
|
|
||||||
struct PicaCommand : public std::vector<u32>
|
|
||||||
{
|
|
||||||
const Pica::CommandProcessor::CommandHeader& GetHeader() const
|
|
||||||
{
|
|
||||||
const u32& val = at(1);
|
|
||||||
return *(Pica::CommandProcessor::CommandHeader*)&val;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef std::vector<PicaCommand> PicaCommandList;
|
|
||||||
|
|
||||||
// Base class for all objects which need to be notified about GPU events
|
// Base class for all objects which need to be notified about GPU events
|
||||||
class DebuggerObserver
|
class DebuggerObserver
|
||||||
{
|
{
|
||||||
|
@ -55,16 +42,6 @@ public:
|
||||||
ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value());
|
ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param lst command list which triggered this call
|
|
||||||
* @param is_new true if the command list was called for the first time
|
|
||||||
* @todo figure out how to make sure called functions don't keep references around beyond their life time
|
|
||||||
*/
|
|
||||||
virtual void OnCommandListCalled(const PicaCommandList& lst, bool is_new)
|
|
||||||
{
|
|
||||||
ERROR_LOG(GSP, "Command list called: %d", (int)is_new);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const GraphicsDebugger* GetDebugger() const
|
const GraphicsDebugger* GetDebugger() const
|
||||||
{
|
{
|
||||||
|
@ -93,49 +70,12 @@ public:
|
||||||
} );
|
} );
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandListCalled(u32 address, u32* command_list, u32 size_in_words)
|
|
||||||
{
|
|
||||||
if (observers.empty())
|
|
||||||
return;
|
|
||||||
|
|
||||||
PicaCommandList cmdlist;
|
|
||||||
for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;)
|
|
||||||
{
|
|
||||||
const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]);
|
|
||||||
|
|
||||||
cmdlist.push_back(PicaCommand());
|
|
||||||
auto& cmd = cmdlist.back();
|
|
||||||
|
|
||||||
size_t size = 2 + header.extra_data_length;
|
|
||||||
size = (size + 1) / 2 * 2; // align to 8 bytes
|
|
||||||
cmd.reserve(size);
|
|
||||||
std::copy(parse_pointer, parse_pointer + size, std::back_inserter(cmd));
|
|
||||||
|
|
||||||
parse_pointer += size;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto obj = std::pair<u32,PicaCommandList>(address, cmdlist);
|
|
||||||
auto it = std::find(command_lists.begin(), command_lists.end(), obj);
|
|
||||||
bool is_new = (it == command_lists.end());
|
|
||||||
if (is_new)
|
|
||||||
command_lists.push_back(obj);
|
|
||||||
|
|
||||||
ForEachObserver([&](DebuggerObserver* observer) {
|
|
||||||
observer->OnCommandListCalled(obj.second, is_new);
|
|
||||||
} );
|
|
||||||
}
|
|
||||||
|
|
||||||
const GSP_GPU::Command& ReadGXCommandHistory(int index) const
|
const GSP_GPU::Command& ReadGXCommandHistory(int index) const
|
||||||
{
|
{
|
||||||
// TODO: Is this thread-safe?
|
// TODO: Is this thread-safe?
|
||||||
return gx_command_history[index];
|
return gx_command_history[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<std::pair<u32,PicaCommandList>>& GetCommandLists() const
|
|
||||||
{
|
|
||||||
return command_lists;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RegisterObserver(DebuggerObserver* observer)
|
void RegisterObserver(DebuggerObserver* observer)
|
||||||
{
|
{
|
||||||
// TODO: Check for duplicates
|
// TODO: Check for duplicates
|
||||||
|
@ -158,7 +98,4 @@ private:
|
||||||
std::vector<DebuggerObserver*> observers;
|
std::vector<DebuggerObserver*> observers;
|
||||||
|
|
||||||
std::vector<GSP_GPU::Command> gx_command_history;
|
std::vector<GSP_GPU::Command> gx_command_history;
|
||||||
|
|
||||||
// vector of pairs of command lists and their storage address
|
|
||||||
std::vector<std::pair<u32,PicaCommandList>> command_lists;
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -39,13 +39,19 @@ template<typename T> class Vec2;
|
||||||
template<typename T> class Vec3;
|
template<typename T> class Vec3;
|
||||||
template<typename T> class Vec4;
|
template<typename T> class Vec4;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec2<T> MakeVec(const T& x, const T& y);
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z);
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w);
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
class Vec2 {
|
class Vec2 {
|
||||||
public:
|
public:
|
||||||
struct {
|
T x;
|
||||||
T x,y;
|
T y;
|
||||||
};
|
|
||||||
|
|
||||||
T* AsArray() { return &x; }
|
T* AsArray() { return &x; }
|
||||||
|
|
||||||
|
@ -68,34 +74,34 @@ public:
|
||||||
a[0] = x; a[1] = y;
|
a[0] = x; a[1] = y;
|
||||||
}
|
}
|
||||||
|
|
||||||
Vec2 operator +(const Vec2& other) const
|
Vec2<decltype(T{}+T{})> operator +(const Vec2& other) const
|
||||||
{
|
{
|
||||||
return Vec2(x+other.x, y+other.y);
|
return MakeVec(x+other.x, y+other.y);
|
||||||
}
|
}
|
||||||
void operator += (const Vec2 &other)
|
void operator += (const Vec2 &other)
|
||||||
{
|
{
|
||||||
x+=other.x; y+=other.y;
|
x+=other.x; y+=other.y;
|
||||||
}
|
}
|
||||||
Vec2 operator -(const Vec2& other) const
|
Vec2<decltype(T{}-T{})> operator -(const Vec2& other) const
|
||||||
{
|
{
|
||||||
return Vec2(x-other.x, y-other.y);
|
return MakeVec(x-other.x, y-other.y);
|
||||||
}
|
}
|
||||||
void operator -= (const Vec2& other)
|
void operator -= (const Vec2& other)
|
||||||
{
|
{
|
||||||
x-=other.x; y-=other.y;
|
x-=other.x; y-=other.y;
|
||||||
}
|
}
|
||||||
Vec2 operator -() const
|
Vec2<decltype(-T{})> operator -() const
|
||||||
{
|
{
|
||||||
return Vec2(-x,-y);
|
return MakeVec(-x,-y);
|
||||||
}
|
}
|
||||||
Vec2 operator * (const Vec2& other) const
|
Vec2<decltype(T{}*T{})> operator * (const Vec2& other) const
|
||||||
{
|
{
|
||||||
return Vec2(x*other.x, y*other.y);
|
return MakeVec(x*other.x, y*other.y);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
Vec2 operator * (const V& f) const
|
Vec2<decltype(T{}*V{})> operator * (const V& f) const
|
||||||
{
|
{
|
||||||
return Vec2(x*f,y*f);
|
return MakeVec(x*f,y*f);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
void operator *= (const V& f)
|
void operator *= (const V& f)
|
||||||
|
@ -103,9 +109,9 @@ public:
|
||||||
x*=f; y*=f;
|
x*=f; y*=f;
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
Vec2 operator / (const V& f) const
|
Vec2<decltype(T{}/V{})> operator / (const V& f) const
|
||||||
{
|
{
|
||||||
return Vec2(x/f,y/f);
|
return MakeVec(x/f,y/f);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
void operator /= (const V& f)
|
void operator /= (const V& f)
|
||||||
|
@ -152,20 +158,9 @@ public:
|
||||||
const T& t() const { return y; }
|
const T& t() const { return y; }
|
||||||
|
|
||||||
// swizzlers - create a subvector of specific components
|
// swizzlers - create a subvector of specific components
|
||||||
Vec2 yx() const { return Vec2(y, x); }
|
const Vec2 yx() const { return Vec2(y, x); }
|
||||||
Vec2 vu() const { return Vec2(y, x); }
|
const Vec2 vu() const { return Vec2(y, x); }
|
||||||
Vec2 ts() const { return Vec2(y, x); }
|
const Vec2 ts() const { return Vec2(y, x); }
|
||||||
|
|
||||||
// Inserters to add new elements to effectively create larger vectors containing this Vec2
|
|
||||||
Vec3<T> InsertBeforeX(const T& value) {
|
|
||||||
return Vec3<T>(value, x, y);
|
|
||||||
}
|
|
||||||
Vec3<T> InsertBeforeY(const T& value) {
|
|
||||||
return Vec3<T>(x, value, y);
|
|
||||||
}
|
|
||||||
Vec3<T> Append(const T& value) {
|
|
||||||
return Vec3<T>(x, y, value);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T, typename V>
|
template<typename T, typename V>
|
||||||
|
@ -180,10 +175,9 @@ template<typename T>
|
||||||
class Vec3
|
class Vec3
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
struct
|
T x;
|
||||||
{
|
T y;
|
||||||
T x,y,z;
|
T z;
|
||||||
};
|
|
||||||
|
|
||||||
T* AsArray() { return &x; }
|
T* AsArray() { return &x; }
|
||||||
|
|
||||||
|
@ -193,7 +187,7 @@ public:
|
||||||
|
|
||||||
template<typename T2>
|
template<typename T2>
|
||||||
Vec3<T2> Cast() const {
|
Vec3<T2> Cast() const {
|
||||||
return Vec3<T2>((T2)x, (T2)y, (T2)z);
|
return MakeVec<T2>((T2)x, (T2)y, (T2)z);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only implemented for T=int and T=float
|
// Only implemented for T=int and T=float
|
||||||
|
@ -202,7 +196,7 @@ public:
|
||||||
|
|
||||||
static Vec3 AssignToAll(const T& f)
|
static Vec3 AssignToAll(const T& f)
|
||||||
{
|
{
|
||||||
return Vec3<T>(f, f, f);
|
return MakeVec(f, f, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Write(T a[3])
|
void Write(T a[3])
|
||||||
|
@ -210,34 +204,34 @@ public:
|
||||||
a[0] = x; a[1] = y; a[2] = z;
|
a[0] = x; a[1] = y; a[2] = z;
|
||||||
}
|
}
|
||||||
|
|
||||||
Vec3 operator +(const Vec3 &other) const
|
Vec3<decltype(T{}+T{})> operator +(const Vec3 &other) const
|
||||||
{
|
{
|
||||||
return Vec3(x+other.x, y+other.y, z+other.z);
|
return MakeVec(x+other.x, y+other.y, z+other.z);
|
||||||
}
|
}
|
||||||
void operator += (const Vec3 &other)
|
void operator += (const Vec3 &other)
|
||||||
{
|
{
|
||||||
x+=other.x; y+=other.y; z+=other.z;
|
x+=other.x; y+=other.y; z+=other.z;
|
||||||
}
|
}
|
||||||
Vec3 operator -(const Vec3 &other) const
|
Vec3<decltype(T{}-T{})> operator -(const Vec3 &other) const
|
||||||
{
|
{
|
||||||
return Vec3(x-other.x, y-other.y, z-other.z);
|
return MakeVec(x-other.x, y-other.y, z-other.z);
|
||||||
}
|
}
|
||||||
void operator -= (const Vec3 &other)
|
void operator -= (const Vec3 &other)
|
||||||
{
|
{
|
||||||
x-=other.x; y-=other.y; z-=other.z;
|
x-=other.x; y-=other.y; z-=other.z;
|
||||||
}
|
}
|
||||||
Vec3 operator -() const
|
Vec3<decltype(-T{})> operator -() const
|
||||||
{
|
{
|
||||||
return Vec3(-x,-y,-z);
|
return MakeVec(-x,-y,-z);
|
||||||
}
|
}
|
||||||
Vec3 operator * (const Vec3 &other) const
|
Vec3<decltype(T{}*T{})> operator * (const Vec3 &other) const
|
||||||
{
|
{
|
||||||
return Vec3(x*other.x, y*other.y, z*other.z);
|
return MakeVec(x*other.x, y*other.y, z*other.z);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
Vec3 operator * (const V& f) const
|
Vec3<decltype(T{}*V{})> operator * (const V& f) const
|
||||||
{
|
{
|
||||||
return Vec3(x*f,y*f,z*f);
|
return MakeVec(x*f,y*f,z*f);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
void operator *= (const V& f)
|
void operator *= (const V& f)
|
||||||
|
@ -245,9 +239,9 @@ public:
|
||||||
x*=f; y*=f; z*=f;
|
x*=f; y*=f; z*=f;
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
Vec3 operator / (const V& f) const
|
Vec3<decltype(T{}/V{})> operator / (const V& f) const
|
||||||
{
|
{
|
||||||
return Vec3(x/f,y/f,z/f);
|
return MakeVec(x/f,y/f,z/f);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
void operator /= (const V& f)
|
void operator /= (const V& f)
|
||||||
|
@ -310,7 +304,7 @@ public:
|
||||||
// swizzlers - create a subvector of specific components
|
// swizzlers - create a subvector of specific components
|
||||||
// e.g. Vec2 uv() { return Vec2(x,y); }
|
// e.g. Vec2 uv() { return Vec2(x,y); }
|
||||||
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
|
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
|
||||||
#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); }
|
#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
|
||||||
#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
|
#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
|
||||||
_DEFINE_SWIZZLER2(a, b, a##b); \
|
_DEFINE_SWIZZLER2(a, b, a##b); \
|
||||||
_DEFINE_SWIZZLER2(a, b, a2##b2); \
|
_DEFINE_SWIZZLER2(a, b, a2##b2); \
|
||||||
|
@ -319,27 +313,13 @@ public:
|
||||||
_DEFINE_SWIZZLER2(b, a, b##a); \
|
_DEFINE_SWIZZLER2(b, a, b##a); \
|
||||||
_DEFINE_SWIZZLER2(b, a, b2##a2); \
|
_DEFINE_SWIZZLER2(b, a, b2##a2); \
|
||||||
_DEFINE_SWIZZLER2(b, a, b3##a3); \
|
_DEFINE_SWIZZLER2(b, a, b3##a3); \
|
||||||
_DEFINE_SWIZZLER2(b, a, b4##a4);
|
_DEFINE_SWIZZLER2(b, a, b4##a4)
|
||||||
|
|
||||||
DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
|
DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
|
||||||
DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
|
DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
|
||||||
DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
|
DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
|
||||||
#undef DEFINE_SWIZZLER2
|
#undef DEFINE_SWIZZLER2
|
||||||
#undef _DEFINE_SWIZZLER2
|
#undef _DEFINE_SWIZZLER2
|
||||||
|
|
||||||
// Inserters to add new elements to effectively create larger vectors containing this Vec2
|
|
||||||
Vec4<T> InsertBeforeX(const T& value) {
|
|
||||||
return Vec4<T>(value, x, y, z);
|
|
||||||
}
|
|
||||||
Vec4<T> InsertBeforeY(const T& value) {
|
|
||||||
return Vec4<T>(x, value, y, z);
|
|
||||||
}
|
|
||||||
Vec4<T> InsertBeforeZ(const T& value) {
|
|
||||||
return Vec4<T>(x, y, value, z);
|
|
||||||
}
|
|
||||||
Vec4<T> Append(const T& value) {
|
|
||||||
return Vec4<T>(x, y, z, value);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T, typename V>
|
template<typename T, typename V>
|
||||||
|
@ -348,16 +328,27 @@ Vec3<T> operator * (const V& f, const Vec3<T>& vec)
|
||||||
return Vec3<T>(f*vec.x,f*vec.y,f*vec.z);
|
return Vec3<T>(f*vec.x,f*vec.y,f*vec.z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline float Vec3<float>::Length() const {
|
||||||
|
return std::sqrt(x * x + y * y + z * z);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline Vec3<float> Vec3<float>::Normalized() const {
|
||||||
|
return *this / Length();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
typedef Vec3<float> Vec3f;
|
typedef Vec3<float> Vec3f;
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
class Vec4
|
class Vec4
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
struct
|
T x;
|
||||||
{
|
T y;
|
||||||
T x,y,z,w;
|
T z;
|
||||||
};
|
T w;
|
||||||
|
|
||||||
T* AsArray() { return &x; }
|
T* AsArray() { return &x; }
|
||||||
|
|
||||||
|
@ -383,34 +374,34 @@ public:
|
||||||
a[0] = x; a[1] = y; a[2] = z; a[3] = w;
|
a[0] = x; a[1] = y; a[2] = z; a[3] = w;
|
||||||
}
|
}
|
||||||
|
|
||||||
Vec4 operator +(const Vec4& other) const
|
Vec4<decltype(T{}+T{})> operator +(const Vec4& other) const
|
||||||
{
|
{
|
||||||
return Vec4(x+other.x, y+other.y, z+other.z, w+other.w);
|
return MakeVec(x+other.x, y+other.y, z+other.z, w+other.w);
|
||||||
}
|
}
|
||||||
void operator += (const Vec4& other)
|
void operator += (const Vec4& other)
|
||||||
{
|
{
|
||||||
x+=other.x; y+=other.y; z+=other.z; w+=other.w;
|
x+=other.x; y+=other.y; z+=other.z; w+=other.w;
|
||||||
}
|
}
|
||||||
Vec4 operator -(const Vec4 &other) const
|
Vec4<decltype(T{}-T{})> operator -(const Vec4 &other) const
|
||||||
{
|
{
|
||||||
return Vec4(x-other.x, y-other.y, z-other.z, w-other.w);
|
return MakeVec(x-other.x, y-other.y, z-other.z, w-other.w);
|
||||||
}
|
}
|
||||||
void operator -= (const Vec4 &other)
|
void operator -= (const Vec4 &other)
|
||||||
{
|
{
|
||||||
x-=other.x; y-=other.y; z-=other.z; w-=other.w;
|
x-=other.x; y-=other.y; z-=other.z; w-=other.w;
|
||||||
}
|
}
|
||||||
Vec4 operator -() const
|
Vec4<decltype(-T{})> operator -() const
|
||||||
{
|
{
|
||||||
return Vec4(-x,-y,-z,-w);
|
return MakeVec(-x,-y,-z,-w);
|
||||||
}
|
}
|
||||||
Vec4 operator * (const Vec4 &other) const
|
Vec4<decltype(T{}*T{})> operator * (const Vec4 &other) const
|
||||||
{
|
{
|
||||||
return Vec4(x*other.x, y*other.y, z*other.z, w*other.w);
|
return MakeVec(x*other.x, y*other.y, z*other.z, w*other.w);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
Vec4 operator * (const V& f) const
|
Vec4<decltype(T{}*V{})> operator * (const V& f) const
|
||||||
{
|
{
|
||||||
return Vec4(x*f,y*f,z*f,w*f);
|
return MakeVec(x*f,y*f,z*f,w*f);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
void operator *= (const V& f)
|
void operator *= (const V& f)
|
||||||
|
@ -418,9 +409,9 @@ public:
|
||||||
x*=f; y*=f; z*=f; w*=f;
|
x*=f; y*=f; z*=f; w*=f;
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
Vec4 operator / (const V& f) const
|
Vec4<decltype(T{}/V{})> operator / (const V& f) const
|
||||||
{
|
{
|
||||||
return Vec4(x/f,y/f,z/f,w/f);
|
return MakeVec(x/f,y/f,z/f,w/f);
|
||||||
}
|
}
|
||||||
template<typename V>
|
template<typename V>
|
||||||
void operator /= (const V& f)
|
void operator /= (const V& f)
|
||||||
|
@ -469,12 +460,12 @@ public:
|
||||||
// swizzlers - create a subvector of specific components
|
// swizzlers - create a subvector of specific components
|
||||||
// e.g. Vec2 uv() { return Vec2(x,y); }
|
// e.g. Vec2 uv() { return Vec2(x,y); }
|
||||||
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
|
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
|
||||||
#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); }
|
#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
|
||||||
#define DEFINE_SWIZZLER2(a, b, a2, b2) \
|
#define DEFINE_SWIZZLER2(a, b, a2, b2) \
|
||||||
_DEFINE_SWIZZLER2(a, b, a##b); \
|
_DEFINE_SWIZZLER2(a, b, a##b); \
|
||||||
_DEFINE_SWIZZLER2(a, b, a2##b2); \
|
_DEFINE_SWIZZLER2(a, b, a2##b2); \
|
||||||
_DEFINE_SWIZZLER2(b, a, b##a); \
|
_DEFINE_SWIZZLER2(b, a, b##a); \
|
||||||
_DEFINE_SWIZZLER2(b, a, b2##a2);
|
_DEFINE_SWIZZLER2(b, a, b2##a2)
|
||||||
|
|
||||||
DEFINE_SWIZZLER2(x, y, r, g);
|
DEFINE_SWIZZLER2(x, y, r, g);
|
||||||
DEFINE_SWIZZLER2(x, z, r, b);
|
DEFINE_SWIZZLER2(x, z, r, b);
|
||||||
|
@ -485,7 +476,7 @@ public:
|
||||||
#undef DEFINE_SWIZZLER2
|
#undef DEFINE_SWIZZLER2
|
||||||
#undef _DEFINE_SWIZZLER2
|
#undef _DEFINE_SWIZZLER2
|
||||||
|
|
||||||
#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); }
|
#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); }
|
||||||
#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
|
#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
|
||||||
_DEFINE_SWIZZLER3(a, b, c, a##b##c); \
|
_DEFINE_SWIZZLER3(a, b, c, a##b##c); \
|
||||||
_DEFINE_SWIZZLER3(a, c, b, a##c##b); \
|
_DEFINE_SWIZZLER3(a, c, b, a##c##b); \
|
||||||
|
@ -498,7 +489,7 @@ public:
|
||||||
_DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \
|
_DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \
|
||||||
_DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \
|
_DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \
|
||||||
_DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
|
_DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
|
||||||
_DEFINE_SWIZZLER3(c, b, a, c2##b2##a2);
|
_DEFINE_SWIZZLER3(c, b, a, c2##b2##a2)
|
||||||
|
|
||||||
DEFINE_SWIZZLER3(x, y, z, r, g, b);
|
DEFINE_SWIZZLER3(x, y, z, r, g, b);
|
||||||
DEFINE_SWIZZLER3(x, y, w, r, g, a);
|
DEFINE_SWIZZLER3(x, y, w, r, g, a);
|
||||||
|
@ -510,69 +501,121 @@ public:
|
||||||
|
|
||||||
|
|
||||||
template<typename T, typename V>
|
template<typename T, typename V>
|
||||||
Vec4<T> operator * (const V& f, const Vec4<T>& vec)
|
Vec4<decltype(V{}*T{})> operator * (const V& f, const Vec4<T>& vec)
|
||||||
{
|
{
|
||||||
return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w);
|
return MakeVec(f*vec.x,f*vec.y,f*vec.z,f*vec.w);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef Vec4<float> Vec4f;
|
typedef Vec4<float> Vec4f;
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline T Dot(const Vec2<T>& a, const Vec2<T>& b)
|
static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec2<T>& a, const Vec2<T>& b)
|
||||||
{
|
{
|
||||||
return a.x*b.x + a.y*b.y;
|
return a.x*b.x + a.y*b.y;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline T Dot(const Vec3<T>& a, const Vec3<T>& b)
|
static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec3<T>& a, const Vec3<T>& b)
|
||||||
{
|
{
|
||||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline T Dot(const Vec4<T>& a, const Vec4<T>& b)
|
static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec4<T>& a, const Vec4<T>& b)
|
||||||
{
|
{
|
||||||
return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
|
return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b)
|
static inline Vec3<decltype(T{}*T{}-T{}*T{})> Cross(const Vec3<T>& a, const Vec3<T>& b)
|
||||||
{
|
{
|
||||||
return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
|
return MakeVec(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// linear interpolation via float: 0.0=begin, 1.0=end
|
// linear interpolation via float: 0.0=begin, 1.0=end
|
||||||
template<typename X>
|
template<typename X>
|
||||||
static inline X Lerp(const X& begin, const X& end, const float t)
|
static inline decltype(X{}*float{}+X{}*float{}) Lerp(const X& begin, const X& end, const float t)
|
||||||
{
|
{
|
||||||
return begin*(1.f-t) + end*t;
|
return begin*(1.f-t) + end*t;
|
||||||
}
|
}
|
||||||
|
|
||||||
// linear interpolation via int: 0=begin, base=end
|
// linear interpolation via int: 0=begin, base=end
|
||||||
template<typename X, int base>
|
template<typename X, int base>
|
||||||
static inline X LerpInt(const X& begin, const X& end, const int t)
|
static inline decltype((X{}*int{}+X{}*int{}) / base) LerpInt(const X& begin, const X& end, const int t)
|
||||||
{
|
{
|
||||||
return (begin*(base-t) + end*t) / base;
|
return (begin*(base-t) + end*t) / base;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Utility vector factories
|
// Utility vector factories
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline Vec2<T> MakeVec2(const T& x, const T& y)
|
static inline Vec2<T> MakeVec(const T& x, const T& y)
|
||||||
{
|
{
|
||||||
return Vec2<T>{x, y};
|
return Vec2<T>{x, y};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z)
|
static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z)
|
||||||
{
|
{
|
||||||
return Vec3<T>{x, y, z};
|
return Vec3<T>{x, y, z};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w)
|
static inline Vec4<T> MakeVec(const T& x, const T& y, const Vec2<T>& zw)
|
||||||
|
{
|
||||||
|
return MakeVec(x, y, zw[0], zw[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec3<T> MakeVec(const Vec2<T>& xy, const T& z)
|
||||||
|
{
|
||||||
|
return MakeVec(xy[0], xy[1], z);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec3<T> MakeVec(const T& x, const Vec2<T>& yz)
|
||||||
|
{
|
||||||
|
return MakeVec(x, yz[0], yz[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w)
|
||||||
{
|
{
|
||||||
return Vec4<T>{x, y, z, w};
|
return Vec4<T>{x, y, z, w};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec4<T> MakeVec(const Vec2<T>& xy, const T& z, const T& w)
|
||||||
|
{
|
||||||
|
return MakeVec(xy[0], xy[1], z, w);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yz, const T& w)
|
||||||
|
{
|
||||||
|
return MakeVec(x, yz[0], yz[1], w);
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: This has priority over "Vec2<Vec2<T>> MakeVec(const Vec2<T>& x, const Vec2<T>& y)".
|
||||||
|
// Even if someone wanted to use an odd object like Vec2<Vec2<T>>, the compiler would error
|
||||||
|
// out soon enough due to misuse of the returned structure.
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec4<T> MakeVec(const Vec2<T>& xy, const Vec2<T>& zw)
|
||||||
|
{
|
||||||
|
return MakeVec(xy[0], xy[1], zw[0], zw[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec4<T> MakeVec(const Vec3<T>& xyz, const T& w)
|
||||||
|
{
|
||||||
|
return MakeVec(xyz[0], xyz[1], xyz[2], w);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yzw)
|
||||||
|
{
|
||||||
|
return MakeVec(x, yzw[0], yzw[1], yzw[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -57,7 +58,7 @@ struct Regs {
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x1);
|
INSERT_PADDING_WORDS(0x1);
|
||||||
|
|
||||||
union {
|
union VSOutputAttributes {
|
||||||
// Maps components of output vertex attributes to semantics
|
// Maps components of output vertex attributes to semantics
|
||||||
enum Semantic : u32
|
enum Semantic : u32
|
||||||
{
|
{
|
||||||
|
@ -94,7 +95,137 @@ struct Regs {
|
||||||
BitField<16, 16, u32> y;
|
BitField<16, 16, u32> y;
|
||||||
} viewport_corner;
|
} viewport_corner;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0xa7);
|
INSERT_PADDING_WORDS(0x17);
|
||||||
|
|
||||||
|
struct TextureConfig {
|
||||||
|
INSERT_PADDING_WORDS(0x1);
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField< 0, 16, u32> height;
|
||||||
|
BitField<16, 16, u32> width;
|
||||||
|
};
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x2);
|
||||||
|
|
||||||
|
u32 address;
|
||||||
|
|
||||||
|
u32 GetPhysicalAddress() {
|
||||||
|
return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR;
|
||||||
|
}
|
||||||
|
|
||||||
|
// texture1 and texture2 store the texture format directly after the address
|
||||||
|
// whereas texture0 inserts some additional flags inbetween.
|
||||||
|
// Hence, we store the format separately so that all other parameters can be described
|
||||||
|
// in a single structure.
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class TextureFormat : u32 {
|
||||||
|
RGBA8 = 0,
|
||||||
|
RGB8 = 1,
|
||||||
|
RGBA5551 = 2,
|
||||||
|
RGB565 = 3,
|
||||||
|
RGBA4 = 4,
|
||||||
|
|
||||||
|
// TODO: Support for the other formats is not implemented, yet.
|
||||||
|
// Seems like they are luminance formats and compressed textures.
|
||||||
|
};
|
||||||
|
|
||||||
|
BitField<0, 1, u32> texturing_enable;
|
||||||
|
TextureConfig texture0;
|
||||||
|
INSERT_PADDING_WORDS(0x8);
|
||||||
|
BitField<0, 4, TextureFormat> texture0_format;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x31);
|
||||||
|
|
||||||
|
// 0xc0-0xff: Texture Combiner (akin to glTexEnv)
|
||||||
|
struct TevStageConfig {
|
||||||
|
enum class Source : u32 {
|
||||||
|
PrimaryColor = 0x0,
|
||||||
|
Texture0 = 0x3,
|
||||||
|
Texture1 = 0x4,
|
||||||
|
Texture2 = 0x5,
|
||||||
|
Texture3 = 0x6,
|
||||||
|
// 0x7-0xc = primary color??
|
||||||
|
Constant = 0xe,
|
||||||
|
Previous = 0xf,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ColorModifier : u32 {
|
||||||
|
SourceColor = 0,
|
||||||
|
OneMinusSourceColor = 1,
|
||||||
|
SourceAlpha = 2,
|
||||||
|
OneMinusSourceAlpha = 3,
|
||||||
|
|
||||||
|
// Other values seem to be non-standard extensions
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class AlphaModifier : u32 {
|
||||||
|
SourceAlpha = 0,
|
||||||
|
OneMinusSourceAlpha = 1,
|
||||||
|
|
||||||
|
// Other values seem to be non-standard extensions
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class Operation : u32 {
|
||||||
|
Replace = 0,
|
||||||
|
Modulate = 1,
|
||||||
|
Add = 2,
|
||||||
|
AddSigned = 3,
|
||||||
|
Lerp = 4,
|
||||||
|
Subtract = 5,
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField< 0, 4, Source> color_source1;
|
||||||
|
BitField< 4, 4, Source> color_source2;
|
||||||
|
BitField< 8, 4, Source> color_source3;
|
||||||
|
BitField<16, 4, Source> alpha_source1;
|
||||||
|
BitField<20, 4, Source> alpha_source2;
|
||||||
|
BitField<24, 4, Source> alpha_source3;
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField< 0, 4, ColorModifier> color_modifier1;
|
||||||
|
BitField< 4, 4, ColorModifier> color_modifier2;
|
||||||
|
BitField< 8, 4, ColorModifier> color_modifier3;
|
||||||
|
BitField<12, 3, AlphaModifier> alpha_modifier1;
|
||||||
|
BitField<16, 3, AlphaModifier> alpha_modifier2;
|
||||||
|
BitField<20, 3, AlphaModifier> alpha_modifier3;
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField< 0, 4, Operation> color_op;
|
||||||
|
BitField<16, 4, Operation> alpha_op;
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField< 0, 8, u32> const_r;
|
||||||
|
BitField< 8, 8, u32> const_g;
|
||||||
|
BitField<16, 8, u32> const_b;
|
||||||
|
BitField<24, 8, u32> const_a;
|
||||||
|
};
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x1);
|
||||||
|
};
|
||||||
|
|
||||||
|
TevStageConfig tev_stage0;
|
||||||
|
INSERT_PADDING_WORDS(0x3);
|
||||||
|
TevStageConfig tev_stage1;
|
||||||
|
INSERT_PADDING_WORDS(0x3);
|
||||||
|
TevStageConfig tev_stage2;
|
||||||
|
INSERT_PADDING_WORDS(0x3);
|
||||||
|
TevStageConfig tev_stage3;
|
||||||
|
INSERT_PADDING_WORDS(0x13);
|
||||||
|
TevStageConfig tev_stage4;
|
||||||
|
INSERT_PADDING_WORDS(0x3);
|
||||||
|
TevStageConfig tev_stage5;
|
||||||
|
INSERT_PADDING_WORDS(0x13);
|
||||||
|
|
||||||
|
const std::array<Regs::TevStageConfig,6> GetTevStages() const {
|
||||||
|
return { tev_stage0, tev_stage1,
|
||||||
|
tev_stage2, tev_stage3,
|
||||||
|
tev_stage4, tev_stage5 };
|
||||||
|
};
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
enum ColorFormat : u32 {
|
enum ColorFormat : u32 {
|
||||||
|
@ -403,6 +534,15 @@ struct Regs {
|
||||||
ADD_FIELD(viewport_depth_range);
|
ADD_FIELD(viewport_depth_range);
|
||||||
ADD_FIELD(viewport_depth_far_plane);
|
ADD_FIELD(viewport_depth_far_plane);
|
||||||
ADD_FIELD(viewport_corner);
|
ADD_FIELD(viewport_corner);
|
||||||
|
ADD_FIELD(texturing_enable);
|
||||||
|
ADD_FIELD(texture0);
|
||||||
|
ADD_FIELD(texture0_format);
|
||||||
|
ADD_FIELD(tev_stage0);
|
||||||
|
ADD_FIELD(tev_stage1);
|
||||||
|
ADD_FIELD(tev_stage2);
|
||||||
|
ADD_FIELD(tev_stage3);
|
||||||
|
ADD_FIELD(tev_stage4);
|
||||||
|
ADD_FIELD(tev_stage5);
|
||||||
ADD_FIELD(framebuffer);
|
ADD_FIELD(framebuffer);
|
||||||
ADD_FIELD(vertex_attributes);
|
ADD_FIELD(vertex_attributes);
|
||||||
ADD_FIELD(index_array);
|
ADD_FIELD(index_array);
|
||||||
|
@ -460,6 +600,15 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
|
||||||
ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
|
ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
|
||||||
ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
|
ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
|
||||||
ASSERT_REG_POSITION(viewport_corner, 0x68);
|
ASSERT_REG_POSITION(viewport_corner, 0x68);
|
||||||
|
ASSERT_REG_POSITION(texturing_enable, 0x80);
|
||||||
|
ASSERT_REG_POSITION(texture0, 0x81);
|
||||||
|
ASSERT_REG_POSITION(texture0_format, 0x8e);
|
||||||
|
ASSERT_REG_POSITION(tev_stage0, 0xc0);
|
||||||
|
ASSERT_REG_POSITION(tev_stage1, 0xc8);
|
||||||
|
ASSERT_REG_POSITION(tev_stage2, 0xd0);
|
||||||
|
ASSERT_REG_POSITION(tev_stage3, 0xd8);
|
||||||
|
ASSERT_REG_POSITION(tev_stage4, 0xf0);
|
||||||
|
ASSERT_REG_POSITION(tev_stage5, 0xf8);
|
||||||
ASSERT_REG_POSITION(framebuffer, 0x110);
|
ASSERT_REG_POSITION(framebuffer, 0x110);
|
||||||
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
||||||
ASSERT_REG_POSITION(index_array, 0x227);
|
ASSERT_REG_POSITION(index_array, 0x227);
|
||||||
|
|
|
@ -2,21 +2,23 @@
|
||||||
// Licensed under GPLv2
|
// Licensed under GPLv2
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include "clipper.h"
|
|
||||||
#include "pica.h"
|
#include "pica.h"
|
||||||
#include "primitive_assembly.h"
|
#include "primitive_assembly.h"
|
||||||
#include "vertex_shader.h"
|
#include "vertex_shader.h"
|
||||||
|
|
||||||
|
#include "video_core/debug_utils/debug_utils.h"
|
||||||
|
|
||||||
namespace Pica {
|
namespace Pica {
|
||||||
|
|
||||||
namespace PrimitiveAssembly {
|
template<typename VertexType>
|
||||||
|
PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology)
|
||||||
|
: topology(topology), buffer_index(0) {
|
||||||
|
}
|
||||||
|
|
||||||
static OutputVertex buffer[2];
|
template<typename VertexType>
|
||||||
static int buffer_index = 0; // TODO: reset this on emulation restart
|
void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler)
|
||||||
|
|
||||||
void SubmitVertex(OutputVertex& vtx)
|
|
||||||
{
|
{
|
||||||
switch (registers.triangle_topology) {
|
switch (topology) {
|
||||||
case Regs::TriangleTopology::List:
|
case Regs::TriangleTopology::List:
|
||||||
case Regs::TriangleTopology::ListIndexed:
|
case Regs::TriangleTopology::ListIndexed:
|
||||||
if (buffer_index < 2) {
|
if (buffer_index < 2) {
|
||||||
|
@ -24,7 +26,7 @@ void SubmitVertex(OutputVertex& vtx)
|
||||||
} else {
|
} else {
|
||||||
buffer_index = 0;
|
buffer_index = 0;
|
||||||
|
|
||||||
Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
|
triangle_handler(buffer[0], buffer[1], vtx);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -32,7 +34,7 @@ void SubmitVertex(OutputVertex& vtx)
|
||||||
if (buffer_index == 2) {
|
if (buffer_index == 2) {
|
||||||
buffer_index = 0;
|
buffer_index = 0;
|
||||||
|
|
||||||
Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
|
triangle_handler(buffer[0], buffer[1], vtx);
|
||||||
|
|
||||||
buffer[1] = vtx;
|
buffer[1] = vtx;
|
||||||
} else {
|
} else {
|
||||||
|
@ -41,11 +43,15 @@ void SubmitVertex(OutputVertex& vtx)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value());
|
ERROR_LOG(GPU, "Unknown triangle topology %x:", (int)topology);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
// explicitly instantiate use cases
|
||||||
|
template
|
||||||
|
struct PrimitiveAssembler<VertexShader::OutputVertex>;
|
||||||
|
template
|
||||||
|
struct PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex>;
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -4,18 +4,40 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
#include "video_core/pica.h"
|
||||||
|
|
||||||
|
#include "video_core/vertex_shader.h"
|
||||||
|
|
||||||
namespace Pica {
|
namespace Pica {
|
||||||
|
|
||||||
namespace VertexShader {
|
/*
|
||||||
struct OutputVertex;
|
* Utility class to build triangles from a series of vertices,
|
||||||
}
|
* according to a given triangle topology.
|
||||||
|
*/
|
||||||
|
template<typename VertexType>
|
||||||
|
struct PrimitiveAssembler {
|
||||||
|
using TriangleHandler = std::function<void(VertexType& v0,
|
||||||
|
VertexType& v1,
|
||||||
|
VertexType& v2)>;
|
||||||
|
|
||||||
namespace PrimitiveAssembly {
|
PrimitiveAssembler(Regs::TriangleTopology topology);
|
||||||
|
|
||||||
using VertexShader::OutputVertex;
|
/*
|
||||||
|
* Queues a vertex, builds primitives from the vertex queue according to the given
|
||||||
|
* triangle topology, and calls triangle_handler for each generated primitive.
|
||||||
|
* NOTE: We could specify the triangle handler in the constructor, but this way we can
|
||||||
|
* keep event and handler code next to each other.
|
||||||
|
*/
|
||||||
|
void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler);
|
||||||
|
|
||||||
|
private:
|
||||||
|
Regs::TriangleTopology topology;
|
||||||
|
|
||||||
|
int buffer_index;
|
||||||
|
VertexType buffer[2];
|
||||||
|
};
|
||||||
|
|
||||||
void SubmitVertex(OutputVertex& vtx);
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -11,6 +11,8 @@
|
||||||
#include "rasterizer.h"
|
#include "rasterizer.h"
|
||||||
#include "vertex_shader.h"
|
#include "vertex_shader.h"
|
||||||
|
|
||||||
|
#include "debug_utils/debug_utils.h"
|
||||||
|
|
||||||
namespace Pica {
|
namespace Pica {
|
||||||
|
|
||||||
namespace Rasterizer {
|
namespace Rasterizer {
|
||||||
|
@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
|
u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
|
||||||
u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
|
u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
|
||||||
|
|
||||||
min_x = min_x & Fix12P4::IntMask();
|
min_x &= Fix12P4::IntMask();
|
||||||
min_y = min_y & Fix12P4::IntMask();
|
min_y &= Fix12P4::IntMask();
|
||||||
max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask();
|
max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask());
|
||||||
max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask();
|
max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask());
|
||||||
|
|
||||||
// Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
|
// Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
|
||||||
// drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
|
// drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
|
||||||
|
@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
|
auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
|
||||||
const Math::Vec2<Fix12P4>& vtx2,
|
const Math::Vec2<Fix12P4>& vtx2,
|
||||||
const Math::Vec2<Fix12P4>& vtx3) {
|
const Math::Vec2<Fix12P4>& vtx3) {
|
||||||
const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0);
|
const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
|
||||||
const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0);
|
const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
|
||||||
// TODO: There is a very small chance this will overflow for sizeof(int) == 4
|
// TODO: There is a very small chance this will overflow for sizeof(int) == 4
|
||||||
return Cross(vec1, vec2).z;
|
return Math::Cross(vec1, vec2).z;
|
||||||
};
|
};
|
||||||
|
|
||||||
int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
||||||
|
@ -143,13 +145,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
//
|
//
|
||||||
// The generalization to three vertices is straightforward in baricentric coordinates.
|
// The generalization to three vertices is straightforward in baricentric coordinates.
|
||||||
auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
|
auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
|
||||||
auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w,
|
auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
|
||||||
attr1 / v1.pos.w,
|
attr1 / v1.pos.w,
|
||||||
attr2 / v2.pos.w);
|
attr2 / v2.pos.w);
|
||||||
auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w,
|
auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
|
||||||
float24::FromFloat32(1.f) / v1.pos.w,
|
float24::FromFloat32(1.f) / v1.pos.w,
|
||||||
float24::FromFloat32(1.f) / v2.pos.w);
|
float24::FromFloat32(1.f) / v2.pos.w);
|
||||||
auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0),
|
auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0),
|
||||||
float24::FromFloat32(w1),
|
float24::FromFloat32(w1),
|
||||||
float24::FromFloat32(w2));
|
float24::FromFloat32(w2));
|
||||||
|
|
||||||
|
@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
|
(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Math::Vec4<u8> texture_color{};
|
||||||
|
float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
|
||||||
|
float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
|
||||||
|
if (registers.texturing_enable) {
|
||||||
|
// Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
|
||||||
|
// of which is composed of four 2x2 subtiles each of which is composed of four texels.
|
||||||
|
// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
|
||||||
|
// texels are laid out in a 2x2 subtile like this:
|
||||||
|
// 2 3
|
||||||
|
// 0 1
|
||||||
|
//
|
||||||
|
// The full 8x8 tile has the texels arranged like this:
|
||||||
|
//
|
||||||
|
// 42 43 46 47 58 59 62 63
|
||||||
|
// 40 41 44 45 56 57 60 61
|
||||||
|
// 34 35 38 39 50 51 54 55
|
||||||
|
// 32 33 36 37 48 49 52 53
|
||||||
|
// 10 11 14 15 26 27 30 31
|
||||||
|
// 08 09 12 13 24 25 28 29
|
||||||
|
// 02 03 06 07 18 19 22 23
|
||||||
|
// 00 01 04 05 16 17 20 21
|
||||||
|
|
||||||
|
// TODO: This is currently hardcoded for RGB8
|
||||||
|
u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
|
||||||
|
|
||||||
|
// TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
|
||||||
|
// To be flexible in case different but similar patterns are used, we keep this
|
||||||
|
// somewhat inefficient code around for now.
|
||||||
|
int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32();
|
||||||
|
int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32();
|
||||||
|
int texel_index_within_tile = 0;
|
||||||
|
for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
|
||||||
|
int sub_tile_width = 1 << block_size_index;
|
||||||
|
int sub_tile_height = 1 << block_size_index;
|
||||||
|
|
||||||
|
int sub_tile_index = (s & sub_tile_width) << block_size_index;
|
||||||
|
sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
|
||||||
|
texel_index_within_tile += sub_tile_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int block_width = 8;
|
||||||
|
const int block_height = 8;
|
||||||
|
|
||||||
|
int coarse_s = (s / block_width) * block_width;
|
||||||
|
int coarse_t = (t / block_height) * block_height;
|
||||||
|
|
||||||
|
const int row_stride = registers.texture0.width * 3;
|
||||||
|
u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
|
||||||
|
texture_color.r() = source_ptr[2];
|
||||||
|
texture_color.g() = source_ptr[1];
|
||||||
|
texture_color.b() = source_ptr[0];
|
||||||
|
texture_color.a() = 0xFF;
|
||||||
|
|
||||||
|
DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Texture environment - consists of 6 stages of color and alpha combining.
|
||||||
|
//
|
||||||
|
// Color combiners take three input color values from some source (e.g. interpolated
|
||||||
|
// vertex color, texture color, previous stage, etc), perform some very simple
|
||||||
|
// operations on each of them (e.g. inversion) and then calculate the output color
|
||||||
|
// with some basic arithmetic. Alpha combiners can be configured separately but work
|
||||||
|
// analogously.
|
||||||
|
Math::Vec4<u8> combiner_output;
|
||||||
|
for (auto tev_stage : registers.GetTevStages()) {
|
||||||
|
using Source = Regs::TevStageConfig::Source;
|
||||||
|
using ColorModifier = Regs::TevStageConfig::ColorModifier;
|
||||||
|
using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
|
||||||
|
using Operation = Regs::TevStageConfig::Operation;
|
||||||
|
|
||||||
|
auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
|
||||||
|
switch (source) {
|
||||||
|
case Source::PrimaryColor:
|
||||||
|
return primary_color.rgb();
|
||||||
|
|
||||||
|
case Source::Texture0:
|
||||||
|
return texture_color.rgb();
|
||||||
|
|
||||||
|
case Source::Constant:
|
||||||
|
return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
|
||||||
|
|
||||||
|
case Source::Previous:
|
||||||
|
return combiner_output.rgb();
|
||||||
|
|
||||||
|
default:
|
||||||
|
ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto GetAlphaSource = [&](Source source) -> u8 {
|
||||||
|
switch (source) {
|
||||||
|
case Source::PrimaryColor:
|
||||||
|
return primary_color.a();
|
||||||
|
|
||||||
|
case Source::Texture0:
|
||||||
|
return texture_color.a();
|
||||||
|
|
||||||
|
case Source::Constant:
|
||||||
|
return tev_stage.const_a;
|
||||||
|
|
||||||
|
case Source::Previous:
|
||||||
|
return combiner_output.a();
|
||||||
|
|
||||||
|
default:
|
||||||
|
ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
|
||||||
|
switch (factor)
|
||||||
|
{
|
||||||
|
case ColorModifier::SourceColor:
|
||||||
|
return values;
|
||||||
|
default:
|
||||||
|
ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
|
||||||
|
switch (factor) {
|
||||||
|
case AlphaModifier::SourceAlpha:
|
||||||
|
return value;
|
||||||
|
default:
|
||||||
|
ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
|
||||||
|
switch (op) {
|
||||||
|
case Operation::Replace:
|
||||||
|
return input[0];
|
||||||
|
|
||||||
|
case Operation::Modulate:
|
||||||
|
return ((input[0] * input[1]) / 255).Cast<u8>();
|
||||||
|
|
||||||
|
default:
|
||||||
|
ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
|
||||||
|
switch (op) {
|
||||||
|
case Operation::Replace:
|
||||||
|
return input[0];
|
||||||
|
|
||||||
|
case Operation::Modulate:
|
||||||
|
return input[0] * input[1] / 255;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// color combiner
|
||||||
|
// NOTE: Not sure if the alpha combiner might use the color output of the previous
|
||||||
|
// stage as input. Hence, we currently don't directly write the result to
|
||||||
|
// combiner_output.rgb(), but instead store it in a temporary variable until
|
||||||
|
// alpha combining has been done.
|
||||||
|
Math::Vec3<u8> color_result[3] = {
|
||||||
|
GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)),
|
||||||
|
GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)),
|
||||||
|
GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3))
|
||||||
|
};
|
||||||
|
auto color_output = ColorCombine(tev_stage.color_op, color_result);
|
||||||
|
|
||||||
|
// alpha combiner
|
||||||
|
std::array<u8,3> alpha_result = {
|
||||||
|
GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)),
|
||||||
|
GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)),
|
||||||
|
GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3))
|
||||||
|
};
|
||||||
|
auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
|
||||||
|
|
||||||
|
combiner_output = Math::MakeVec(color_output, alpha_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Not sure if the multiplication by 65535 has already been taken care
|
||||||
|
// of when transforming to screen coordinates or not.
|
||||||
u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
|
u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
|
||||||
(float)v1.screenpos[2].ToFloat32() * w1 +
|
(float)v1.screenpos[2].ToFloat32() * w1 +
|
||||||
(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536?
|
(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
||||||
SetDepth(x >> 4, y >> 4, z);
|
SetDepth(x >> 4, y >> 4, z);
|
||||||
|
|
||||||
DrawPixel(x >> 4, y >> 4, primary_color);
|
DrawPixel(x >> 4, y >> 4, combiner_output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include "pica.h"
|
#include "pica.h"
|
||||||
#include "vertex_shader.h"
|
#include "vertex_shader.h"
|
||||||
|
#include "debug_utils/debug_utils.h"
|
||||||
#include <core/mem_map.h>
|
#include <core/mem_map.h>
|
||||||
#include <common/file_util.h>
|
#include <common/file_util.h>
|
||||||
|
|
||||||
|
@ -50,6 +51,11 @@ struct VertexShaderState {
|
||||||
};
|
};
|
||||||
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
||||||
u32* call_stack_pointer;
|
u32* call_stack_pointer;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 max_offset; // maximum program counter ever reached
|
||||||
|
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
||||||
|
} debug;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void ProcessShaderCode(VertexShaderState& state) {
|
static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
|
@ -57,27 +63,34 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
bool increment_pc = true;
|
bool increment_pc = true;
|
||||||
bool exit_loop = false;
|
bool exit_loop = false;
|
||||||
const Instruction& instr = *(const Instruction*)state.program_counter;
|
const Instruction& instr = *(const Instruction*)state.program_counter;
|
||||||
|
state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
|
||||||
|
|
||||||
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
|
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()]
|
||||||
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
|
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x
|
||||||
: (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x
|
: (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x
|
||||||
: nullptr;
|
: nullptr;
|
||||||
const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
|
const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()]
|
||||||
: &state.temporary_registers[instr.common.src2-0x10].x;
|
: &state.temporary_registers[instr.common.src2.GetIndex()].x;
|
||||||
// TODO: Unsure about the limit values
|
float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
|
||||||
float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
|
: (instr.common.dest < 0x10) ? nullptr
|
||||||
: (instr.common.dest <= 0x3C) ? nullptr
|
: (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
|
||||||
: (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
|
|
||||||
: nullptr;
|
: nullptr;
|
||||||
|
|
||||||
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
|
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
|
||||||
|
const bool negate_src1 = swizzle.negate;
|
||||||
|
|
||||||
const float24 src1[4] = {
|
float24 src1[4] = {
|
||||||
src1_[(int)swizzle.GetSelectorSrc1(0)],
|
src1_[(int)swizzle.GetSelectorSrc1(0)],
|
||||||
src1_[(int)swizzle.GetSelectorSrc1(1)],
|
src1_[(int)swizzle.GetSelectorSrc1(1)],
|
||||||
src1_[(int)swizzle.GetSelectorSrc1(2)],
|
src1_[(int)swizzle.GetSelectorSrc1(2)],
|
||||||
src1_[(int)swizzle.GetSelectorSrc1(3)],
|
src1_[(int)swizzle.GetSelectorSrc1(3)],
|
||||||
};
|
};
|
||||||
|
if (negate_src1) {
|
||||||
|
src1[0] = src1[0] * float24::FromFloat32(-1);
|
||||||
|
src1[1] = src1[1] * float24::FromFloat32(-1);
|
||||||
|
src1[2] = src1[2] * float24::FromFloat32(-1);
|
||||||
|
src1[3] = src1[3] * float24::FromFloat32(-1);
|
||||||
|
}
|
||||||
const float24 src2[4] = {
|
const float24 src2[4] = {
|
||||||
src2_[(int)swizzle.GetSelectorSrc2(0)],
|
src2_[(int)swizzle.GetSelectorSrc2(0)],
|
||||||
src2_[(int)swizzle.GetSelectorSrc2(1)],
|
src2_[(int)swizzle.GetSelectorSrc2(1)],
|
||||||
|
@ -88,6 +101,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
switch (instr.opcode) {
|
switch (instr.opcode) {
|
||||||
case Instruction::OpCode::ADD:
|
case Instruction::OpCode::ADD:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -100,6 +114,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
|
|
||||||
case Instruction::OpCode::MUL:
|
case Instruction::OpCode::MUL:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -113,6 +128,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
case Instruction::OpCode::DP3:
|
case Instruction::OpCode::DP3:
|
||||||
case Instruction::OpCode::DP4:
|
case Instruction::OpCode::DP4:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
float24 dot = float24::FromFloat32(0.f);
|
float24 dot = float24::FromFloat32(0.f);
|
||||||
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
|
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
|
||||||
for (int i = 0; i < num_components; ++i)
|
for (int i = 0; i < num_components; ++i)
|
||||||
|
@ -130,6 +146,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
// Reciprocal
|
// Reciprocal
|
||||||
case Instruction::OpCode::RCP:
|
case Instruction::OpCode::RCP:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -145,6 +162,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
// Reciprocal Square Root
|
// Reciprocal Square Root
|
||||||
case Instruction::OpCode::RSQ:
|
case Instruction::OpCode::RSQ:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -159,6 +177,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
|
|
||||||
case Instruction::OpCode::MOV:
|
case Instruction::OpCode::MOV:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -172,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
|
if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
|
||||||
exit_loop = true;
|
exit_loop = true;
|
||||||
} else {
|
} else {
|
||||||
state.program_counter = &shader_memory[*state.call_stack_pointer--];
|
// Jump back to call stack position, invalidate call stack entry, move up call stack pointer
|
||||||
*state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS;
|
state.program_counter = &shader_memory[*state.call_stack_pointer];
|
||||||
|
*state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -212,6 +232,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
|
||||||
|
|
||||||
const u32* main = &shader_memory[registers.vs_main_offset];
|
const u32* main = &shader_memory[registers.vs_main_offset];
|
||||||
state.program_counter = (u32*)main;
|
state.program_counter = (u32*)main;
|
||||||
|
state.debug.max_offset = 0;
|
||||||
|
state.debug.max_opdesc_id = 0;
|
||||||
|
|
||||||
// Setup input register table
|
// Setup input register table
|
||||||
const auto& attribute_register_map = registers.vs_input_register_map;
|
const auto& attribute_register_map = registers.vs_input_register_map;
|
||||||
|
@ -255,6 +277,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
|
||||||
state.call_stack_pointer = &state.call_stack[0];
|
state.call_stack_pointer = &state.call_stack[0];
|
||||||
|
|
||||||
ProcessShaderCode(state);
|
ProcessShaderCode(state);
|
||||||
|
DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data,
|
||||||
|
state.debug.max_opdesc_id, registers.vs_main_offset,
|
||||||
|
registers.vs_output_attributes);
|
||||||
|
|
||||||
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
||||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
||||||
|
|
|
@ -27,7 +27,6 @@ struct OutputVertex {
|
||||||
Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
|
Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
|
||||||
Math::Vec4<float24> color;
|
Math::Vec4<float24> color;
|
||||||
Math::Vec2<float24> tc0;
|
Math::Vec2<float24> tc0;
|
||||||
float24 tc0_v;
|
|
||||||
|
|
||||||
// Padding for optimal alignment
|
// Padding for optimal alignment
|
||||||
float24 pad[14];
|
float24 pad[14];
|
||||||
|
@ -36,6 +35,7 @@ struct OutputVertex {
|
||||||
|
|
||||||
// position after perspective divide
|
// position after perspective divide
|
||||||
Math::Vec3<float24> screenpos;
|
Math::Vec3<float24> screenpos;
|
||||||
|
float24 pad2;
|
||||||
|
|
||||||
// Linear interpolation
|
// Linear interpolation
|
||||||
// factor: 0=this, 1=vtx
|
// factor: 0=this, 1=vtx
|
||||||
|
@ -59,6 +59,7 @@ struct OutputVertex {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
||||||
|
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
|
||||||
|
|
||||||
union Instruction {
|
union Instruction {
|
||||||
enum class OpCode : u32 {
|
enum class OpCode : u32 {
|
||||||
|
@ -117,9 +118,78 @@ union Instruction {
|
||||||
// while "dest" addresses individual floats.
|
// while "dest" addresses individual floats.
|
||||||
union {
|
union {
|
||||||
BitField<0x00, 0x5, u32> operand_desc_id;
|
BitField<0x00, 0x5, u32> operand_desc_id;
|
||||||
BitField<0x07, 0x5, u32> src2;
|
|
||||||
BitField<0x0c, 0x7, u32> src1;
|
template<class BitFieldType>
|
||||||
BitField<0x13, 0x7, u32> dest;
|
struct SourceRegister : BitFieldType {
|
||||||
|
enum RegisterType {
|
||||||
|
Input,
|
||||||
|
Temporary,
|
||||||
|
FloatUniform
|
||||||
|
};
|
||||||
|
|
||||||
|
RegisterType GetRegisterType() const {
|
||||||
|
if (BitFieldType::Value() < 0x10)
|
||||||
|
return Input;
|
||||||
|
else if (BitFieldType::Value() < 0x20)
|
||||||
|
return Temporary;
|
||||||
|
else
|
||||||
|
return FloatUniform;
|
||||||
|
}
|
||||||
|
|
||||||
|
int GetIndex() const {
|
||||||
|
if (GetRegisterType() == Input)
|
||||||
|
return BitFieldType::Value();
|
||||||
|
else if (GetRegisterType() == Temporary)
|
||||||
|
return BitFieldType::Value() - 0x10;
|
||||||
|
else if (GetRegisterType() == FloatUniform)
|
||||||
|
return BitFieldType::Value() - 0x20;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GetRegisterName() const {
|
||||||
|
std::map<RegisterType, std::string> type = {
|
||||||
|
{ Input, "i" },
|
||||||
|
{ Temporary, "t" },
|
||||||
|
{ FloatUniform, "f" },
|
||||||
|
};
|
||||||
|
return type[GetRegisterType()] + std::to_string(GetIndex());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
SourceRegister<BitField<0x07, 0x5, u32>> src2;
|
||||||
|
SourceRegister<BitField<0x0c, 0x7, u32>> src1;
|
||||||
|
|
||||||
|
struct : BitField<0x15, 0x5, u32>
|
||||||
|
{
|
||||||
|
enum RegisterType {
|
||||||
|
Output,
|
||||||
|
Temporary,
|
||||||
|
Unknown
|
||||||
|
};
|
||||||
|
RegisterType GetRegisterType() const {
|
||||||
|
if (Value() < 0x8)
|
||||||
|
return Output;
|
||||||
|
else if (Value() < 0x10)
|
||||||
|
return Unknown;
|
||||||
|
else
|
||||||
|
return Temporary;
|
||||||
|
}
|
||||||
|
int GetIndex() const {
|
||||||
|
if (GetRegisterType() == Output)
|
||||||
|
return Value();
|
||||||
|
else if (GetRegisterType() == Temporary)
|
||||||
|
return Value() - 0x10;
|
||||||
|
else
|
||||||
|
return Value();
|
||||||
|
}
|
||||||
|
std::string GetRegisterName() const {
|
||||||
|
std::map<RegisterType, std::string> type = {
|
||||||
|
{ Output, "o" },
|
||||||
|
{ Temporary, "t" },
|
||||||
|
{ Unknown, "u" }
|
||||||
|
};
|
||||||
|
return type[GetRegisterType()] + std::to_string(GetIndex());
|
||||||
|
}
|
||||||
|
} dest;
|
||||||
} common;
|
} common;
|
||||||
|
|
||||||
// Format used for flow control instructions ("if")
|
// Format used for flow control instructions ("if")
|
||||||
|
@ -128,6 +198,7 @@ union Instruction {
|
||||||
BitField<0x0a, 0xc, u32> offset_words;
|
BitField<0x0a, 0xc, u32> offset_words;
|
||||||
} flow_control;
|
} flow_control;
|
||||||
};
|
};
|
||||||
|
static_assert(std::is_standard_layout<Instruction>::value, "Structure is not using standard layout!");
|
||||||
|
|
||||||
union SwizzlePattern {
|
union SwizzlePattern {
|
||||||
u32 hex;
|
u32 hex;
|
||||||
|
@ -185,6 +256,8 @@ union SwizzlePattern {
|
||||||
// Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
|
// Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
|
||||||
BitField< 0, 4, u32> dest_mask;
|
BitField< 0, 4, u32> dest_mask;
|
||||||
|
|
||||||
|
BitField< 4, 1, u32> negate; // negates src1
|
||||||
|
|
||||||
BitField< 5, 2, Selector> src1_selector_3;
|
BitField< 5, 2, Selector> src1_selector_3;
|
||||||
BitField< 7, 2, Selector> src1_selector_2;
|
BitField< 7, 2, Selector> src1_selector_2;
|
||||||
BitField< 9, 2, Selector> src1_selector_1;
|
BitField< 9, 2, Selector> src1_selector_1;
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
</ProjectConfiguration>
|
</ProjectConfiguration>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClCompile Include="debug_utils\debug_utils.cpp" />
|
||||||
<ClCompile Include="renderer_opengl\renderer_opengl.cpp" />
|
<ClCompile Include="renderer_opengl\renderer_opengl.cpp" />
|
||||||
<ClCompile Include="clipper.cpp" />
|
<ClCompile Include="clipper.cpp" />
|
||||||
<ClCompile Include="command_processor.cpp" />
|
<ClCompile Include="command_processor.cpp" />
|
||||||
|
@ -40,6 +41,7 @@
|
||||||
<ClInclude Include="utils.h" />
|
<ClInclude Include="utils.h" />
|
||||||
<ClInclude Include="vertex_shader.h" />
|
<ClInclude Include="vertex_shader.h" />
|
||||||
<ClInclude Include="video_core.h" />
|
<ClInclude Include="video_core.h" />
|
||||||
|
<ClInclude Include="debug_utils\debug_utils.h" />
|
||||||
<ClInclude Include="renderer_opengl\renderer_opengl.h" />
|
<ClInclude Include="renderer_opengl\renderer_opengl.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|
|
@ -4,6 +4,9 @@
|
||||||
<Filter Include="renderer_opengl">
|
<Filter Include="renderer_opengl">
|
||||||
<UniqueIdentifier>{e0245557-dbd4-423e-9399-513d5e99f1e4}</UniqueIdentifier>
|
<UniqueIdentifier>{e0245557-dbd4-423e-9399-513d5e99f1e4}</UniqueIdentifier>
|
||||||
</Filter>
|
</Filter>
|
||||||
|
<Filter Include="debug_utils">
|
||||||
|
<UniqueIdentifier>{0ac498e6-bbd8-46e3-9d5f-e816546ab90e}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="renderer_opengl\renderer_opengl.cpp">
|
<ClCompile Include="renderer_opengl\renderer_opengl.cpp">
|
||||||
|
@ -16,11 +19,11 @@
|
||||||
<ClCompile Include="utils.cpp" />
|
<ClCompile Include="utils.cpp" />
|
||||||
<ClCompile Include="vertex_shader.cpp" />
|
<ClCompile Include="vertex_shader.cpp" />
|
||||||
<ClCompile Include="video_core.cpp" />
|
<ClCompile Include="video_core.cpp" />
|
||||||
|
<ClCompile Include="debug_utils\debug_utils.cpp">
|
||||||
|
<Filter>debug_utils</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="renderer_opengl\renderer_opengl.h">
|
|
||||||
<Filter>renderer_opengl</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="clipper.h" />
|
<ClInclude Include="clipper.h" />
|
||||||
<ClInclude Include="command_processor.h" />
|
<ClInclude Include="command_processor.h" />
|
||||||
<ClInclude Include="gpu_debugger.h" />
|
<ClInclude Include="gpu_debugger.h" />
|
||||||
|
@ -32,6 +35,10 @@
|
||||||
<ClInclude Include="utils.h" />
|
<ClInclude Include="utils.h" />
|
||||||
<ClInclude Include="vertex_shader.h" />
|
<ClInclude Include="vertex_shader.h" />
|
||||||
<ClInclude Include="video_core.h" />
|
<ClInclude Include="video_core.h" />
|
||||||
|
<ClInclude Include="renderer_opengl\renderer_opengl.h" />
|
||||||
|
<ClInclude Include="debug_utils\debug_utils.h">
|
||||||
|
<Filter>debug_utils</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Text Include="CMakeLists.txt" />
|
<Text Include="CMakeLists.txt" />
|
||||||
|
|
Loading…
Reference in New Issue