mirror of https://github.com/stenzek/duckstation
CPU/CodeCache: Rewrite using new-rec's block management
parent
f82d08e223
commit
79e1ae3e54
@ -0,0 +1,198 @@
|
||||
|
||||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>, PCSX2 Team
|
||||
// SPDX-License-Identifier: GPL-3.0
|
||||
|
||||
#include "perf_scope.h"
|
||||
#include "assert.h"
|
||||
#include "string_util.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <atomic>
|
||||
#include <ctime>
|
||||
#include <elf.h>
|
||||
#include <mutex>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// #define ProfileWithPerf
|
||||
// #define ProfileWithPerfJitDump
|
||||
|
||||
// Perf is only supported on linux
|
||||
#if defined(__linux__) && defined(ProfileWithPerf)
|
||||
|
||||
static std::FILE* s_map_file = nullptr;
|
||||
static bool s_map_file_opened = false;
|
||||
static std::mutex s_mutex;
|
||||
static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
std::unique_lock lock(s_mutex);
|
||||
|
||||
if (!s_map_file)
|
||||
{
|
||||
if (s_map_file_opened)
|
||||
return;
|
||||
|
||||
char file[256];
|
||||
snprintf(file, std::size(file), "/tmp/perf-%d.map", getpid());
|
||||
s_map_file = std::fopen(file, "wb");
|
||||
s_map_file_opened = true;
|
||||
if (!s_map_file)
|
||||
return;
|
||||
}
|
||||
|
||||
std::fprintf(s_map_file, "%" PRIx64 " %zx %s\n", static_cast<u64>(reinterpret_cast<uintptr_t>(ptr)), size, symbol);
|
||||
std::fflush(s_map_file);
|
||||
}
|
||||
|
||||
#elif defined(__linux__) && defined(ProfileWithPerfJitDump)
|
||||
enum : u32
|
||||
{
|
||||
JIT_CODE_LOAD = 0,
|
||||
JIT_CODE_MOVE = 1,
|
||||
JIT_CODE_DEBUG_INFO = 2,
|
||||
JIT_CODE_CLOSE = 3,
|
||||
JIT_CODE_UNWINDING_INFO = 4
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct JITDUMP_HEADER
|
||||
{
|
||||
u32 magic = 0x4A695444; // JiTD
|
||||
u32 version = 1;
|
||||
u32 header_size = sizeof(JITDUMP_HEADER);
|
||||
u32 elf_mach;
|
||||
u32 pad1 = 0;
|
||||
u32 pid;
|
||||
u64 timestamp;
|
||||
u64 flags = 0;
|
||||
};
|
||||
struct JITDUMP_RECORD_HEADER
|
||||
{
|
||||
u32 id;
|
||||
u32 total_size;
|
||||
u64 timestamp;
|
||||
};
|
||||
struct JITDUMP_CODE_LOAD
|
||||
{
|
||||
JITDUMP_RECORD_HEADER header;
|
||||
u32 pid;
|
||||
u32 tid;
|
||||
u64 vma;
|
||||
u64 code_addr;
|
||||
u64 code_size;
|
||||
u64 code_index;
|
||||
// name
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static u64 JitDumpTimestamp()
|
||||
{
|
||||
struct timespec ts = {};
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (static_cast<u64>(ts.tv_sec) * 1000000000ULL) + static_cast<u64>(ts.tv_nsec);
|
||||
}
|
||||
|
||||
static FILE* s_jitdump_file = nullptr;
|
||||
static bool s_jitdump_file_opened = false;
|
||||
static std::mutex s_jitdump_mutex;
|
||||
static u32 s_jitdump_record_id;
|
||||
|
||||
static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
const u32 namelen = std::strlen(symbol) + 1;
|
||||
|
||||
std::unique_lock lock(s_jitdump_mutex);
|
||||
if (!s_jitdump_file)
|
||||
{
|
||||
if (!s_jitdump_file_opened)
|
||||
{
|
||||
char file[256];
|
||||
snprintf(file, std::size(file), "jit-%d.dump", getpid());
|
||||
s_jitdump_file = fopen(file, "w+b");
|
||||
s_jitdump_file_opened = true;
|
||||
if (!s_jitdump_file)
|
||||
return;
|
||||
}
|
||||
|
||||
void* perf_marker = mmap(nullptr, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(s_jitdump_file), 0);
|
||||
AssertMsg(perf_marker != MAP_FAILED, "Map perf marker");
|
||||
|
||||
JITDUMP_HEADER jh = {};
|
||||
#if defined(__aarch64__)
|
||||
jh.elf_mach = EM_AARCH64;
|
||||
#else
|
||||
jh.elf_mach = EM_X86_64;
|
||||
#endif
|
||||
jh.pid = getpid();
|
||||
jh.timestamp = JitDumpTimestamp();
|
||||
std::fwrite(&jh, sizeof(jh), 1, s_jitdump_file);
|
||||
}
|
||||
|
||||
JITDUMP_CODE_LOAD cl = {};
|
||||
cl.header.id = JIT_CODE_LOAD;
|
||||
cl.header.total_size = sizeof(cl) + namelen + static_cast<u32>(size);
|
||||
cl.header.timestamp = JitDumpTimestamp();
|
||||
cl.pid = getpid();
|
||||
cl.tid = syscall(SYS_gettid);
|
||||
cl.vma = 0;
|
||||
cl.code_addr = static_cast<u64>(reinterpret_cast<uintptr_t>(ptr));
|
||||
cl.code_size = static_cast<u64>(size);
|
||||
cl.code_index = s_jitdump_record_id++;
|
||||
std::fwrite(&cl, sizeof(cl), 1, s_jitdump_file);
|
||||
std::fwrite(symbol, namelen, 1, s_jitdump_file);
|
||||
std::fwrite(ptr, size, 1, s_jitdump_file);
|
||||
std::fflush(s_jitdump_file);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && (defined(ProfileWithPerf) || defined(ProfileWithPerfJitDump))
|
||||
|
||||
void PerfScope::Register(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
char full_symbol[128];
|
||||
if (HasPrefix())
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s_%s", m_prefix, symbol);
|
||||
else
|
||||
StringUtil::Strlcpy(full_symbol, symbol, std::size(full_symbol));
|
||||
RegisterMethod(ptr, size, full_symbol);
|
||||
}
|
||||
|
||||
void PerfScope::RegisterPC(const void* ptr, size_t size, u32 pc)
|
||||
{
|
||||
char full_symbol[128];
|
||||
if (HasPrefix())
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s_%08X", m_prefix, pc);
|
||||
else
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%08X", pc);
|
||||
RegisterMethod(ptr, size, full_symbol);
|
||||
}
|
||||
|
||||
void PerfScope::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key)
|
||||
{
|
||||
char full_symbol[128];
|
||||
if (HasPrefix())
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s_%s%016" PRIX64, m_prefix, prefix, key);
|
||||
else
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s%016" PRIX64, prefix, key);
|
||||
RegisterMethod(ptr, size, full_symbol);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void PerfScope::Register(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
}
|
||||
void PerfScope::RegisterPC(const void* ptr, size_t size, u32 pc)
|
||||
{
|
||||
}
|
||||
void PerfScope::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -0,0 +1,20 @@
|
||||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>, PCSX2 Team
|
||||
// SPDX-License-Identifier: GPL-3.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
class PerfScope
|
||||
{
|
||||
public:
|
||||
constexpr PerfScope(const char* prefix) : m_prefix(prefix) {}
|
||||
bool HasPrefix() const { return (m_prefix && m_prefix[0]); }
|
||||
|
||||
void Register(const void* ptr, size_t size, const char* symbol);
|
||||
void RegisterPC(const void* ptr, size_t size, u32 pc);
|
||||
void RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key);
|
||||
|
||||
private:
|
||||
const char* m_prefix;
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,279 @@
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bus.h"
|
||||
#include "common/bitfield.h"
|
||||
#include "common/perf_scope.h"
|
||||
#include "cpu_code_cache.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_types.h"
|
||||
|
||||
#include "util/jit_code_buffer.h"
|
||||
#include "util/page_fault_handler.h"
|
||||
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
// #include "cpu_recompiler_types.h"
|
||||
#endif
|
||||
|
||||
namespace CPU::CodeCache {
|
||||
|
||||
enum : u32
|
||||
{
|
||||
LUT_TABLE_COUNT = 0x10000,
|
||||
LUT_TABLE_SIZE = 0x10000 / sizeof(u32), // 16384, one for each PC
|
||||
LUT_TABLE_SHIFT = 16,
|
||||
|
||||
MAX_BLOCK_EXIT_LINKS = 2,
|
||||
};
|
||||
|
||||
using CodeLUT = const void**;
|
||||
using CodeLUTArray = std::array<CodeLUT, LUT_TABLE_COUNT>;
|
||||
using BlockLinkMap = std::unordered_multimap<u32, void*>; // TODO: try ordered?
|
||||
|
||||
enum RegInfoFlags : u8
|
||||
{
|
||||
RI_LIVE = (1 << 0),
|
||||
RI_USED = (1 << 1),
|
||||
RI_LASTUSE = (1 << 2),
|
||||
};
|
||||
|
||||
struct InstructionInfo
|
||||
{
|
||||
u32 pc; // TODO: Remove this, old recs still depend on it.
|
||||
|
||||
bool is_branch_instruction : 1;
|
||||
bool is_direct_branch_instruction : 1;
|
||||
bool is_unconditional_branch_instruction : 1;
|
||||
bool is_branch_delay_slot : 1;
|
||||
bool is_load_instruction : 1;
|
||||
bool is_store_instruction : 1;
|
||||
bool is_load_delay_slot : 1;
|
||||
bool is_last_instruction : 1;
|
||||
bool has_load_delay : 1;
|
||||
bool can_trap : 1;
|
||||
|
||||
u8 reg_flags[static_cast<u8>(Reg::count)];
|
||||
// Reg write_reg[3];
|
||||
Reg read_reg[3];
|
||||
|
||||
// If unset, values which are not live will not be written back to memory.
|
||||
// Tends to break stuff at the moment.
|
||||
static constexpr bool WRITE_DEAD_VALUES = true;
|
||||
|
||||
/// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
|
||||
/// In other words, the register is worth keeping in a host register/caching it.
|
||||
inline bool UsedTest(Reg reg) const { return (reg_flags[static_cast<u8>(reg)] & (RI_USED | RI_LASTUSE)) == RI_USED; }
|
||||
|
||||
/// Returns true if the value should be computed/written back.
|
||||
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
|
||||
inline bool LiveTest(Reg reg) const
|
||||
{
|
||||
return WRITE_DEAD_VALUES || ((reg_flags[static_cast<u8>(reg)] & RI_LIVE) != 0);
|
||||
}
|
||||
|
||||
/// Returns true if the register can be renamed into another.
|
||||
inline bool RenameTest(Reg reg) const { return (reg == Reg::zero || !UsedTest(reg) || !LiveTest(reg)); }
|
||||
|
||||
/// Returns true if this instruction reads this register.
|
||||
inline bool ReadsReg(Reg reg) const { return (read_reg[0] == reg || read_reg[1] == reg || read_reg[2] == reg); }
|
||||
};
|
||||
|
||||
enum class BlockState : u8
|
||||
{
|
||||
Valid,
|
||||
Invalidated,
|
||||
NeedsRecompile,
|
||||
FallbackToInterpreter
|
||||
};
|
||||
|
||||
enum class BlockFlags : u8
|
||||
{
|
||||
None = 0,
|
||||
ContainsLoadStoreInstructions = (1 << 0),
|
||||
SpansPages = (1 << 1),
|
||||
BranchDelaySpansPages = (1 << 2),
|
||||
};
|
||||
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(BlockFlags);
|
||||
|
||||
enum class PageProtectionMode : u8
|
||||
{
|
||||
WriteProtected,
|
||||
ManualCheck,
|
||||
Unprotected,
|
||||
};
|
||||
|
||||
struct BlockMetadata
|
||||
{
|
||||
TickCount uncached_fetch_ticks;
|
||||
u32 icache_line_count;
|
||||
BlockFlags flags;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4324) // C4324: 'CPU::CodeCache::Block': structure was padded due to alignment specifier)
|
||||
#endif
|
||||
|
||||
struct alignas(16) Block
|
||||
{
|
||||
u32 pc;
|
||||
u32 size; // in guest instructions
|
||||
const void* host_code;
|
||||
|
||||
// links to previous/next block within page
|
||||
Block* next_block_in_page;
|
||||
|
||||
BlockLinkMap::iterator exit_links[MAX_BLOCK_EXIT_LINKS];
|
||||
u8 num_exit_links;
|
||||
|
||||
// TODO: Move up so it's part of the same cache line
|
||||
BlockState state;
|
||||
BlockFlags flags;
|
||||
PageProtectionMode protection;
|
||||
|
||||
TickCount uncached_fetch_ticks;
|
||||
u32 icache_line_count;
|
||||
|
||||
u32 compile_frame;
|
||||
u8 compile_count;
|
||||
|
||||
// followed by Instruction * size, InstructionRegInfo * size
|
||||
ALWAYS_INLINE const Instruction* Instructions() const { return reinterpret_cast<const Instruction*>(this + 1); }
|
||||
ALWAYS_INLINE Instruction* Instructions() { return reinterpret_cast<Instruction*>(this + 1); }
|
||||
|
||||
ALWAYS_INLINE const InstructionInfo* InstructionsInfo() const
|
||||
{
|
||||
return reinterpret_cast<const InstructionInfo*>(Instructions() + size);
|
||||
}
|
||||
ALWAYS_INLINE InstructionInfo* InstructionsInfo()
|
||||
{
|
||||
return reinterpret_cast<InstructionInfo*>(Instructions() + size);
|
||||
}
|
||||
|
||||
// returns true if the block has a given flag
|
||||
ALWAYS_INLINE bool HasFlag(BlockFlags flag) const { return ((flags & flag) != BlockFlags::None); }
|
||||
|
||||
// returns the page index for the start of the block
|
||||
ALWAYS_INLINE u32 StartPageIndex() const { return Bus::GetRAMCodePageIndex(pc); }
|
||||
|
||||
// returns the page index for the last instruction in the block (inclusive)
|
||||
ALWAYS_INLINE u32 EndPageIndex() const { return Bus::GetRAMCodePageIndex(pc + ((size - 1) * sizeof(Instruction))); }
|
||||
|
||||
// returns true if the block spans multiple pages
|
||||
ALWAYS_INLINE bool SpansPages() const { return StartPageIndex() != EndPageIndex(); }
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
using BlockLUTArray = std::array<Block**, LUT_TABLE_COUNT>;
|
||||
|
||||
struct LoadstoreBackpatchInfo
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 gpr_bitmask;
|
||||
u16 cycles;
|
||||
u16 address_register : 5;
|
||||
u16 data_register : 5;
|
||||
u16 size : 2;
|
||||
u16 is_signed : 1;
|
||||
u16 is_load : 1;
|
||||
};
|
||||
|
||||
const void* thunk_address; // only needed for oldrec
|
||||
};
|
||||
|
||||
u32 guest_pc;
|
||||
u8 code_size;
|
||||
|
||||
MemoryAccessSize AccessSize() const { return static_cast<MemoryAccessSize>(size); }
|
||||
u32 AccessSizeInBytes() const { return 1u << size; }
|
||||
};
|
||||
static_assert(sizeof(LoadstoreBackpatchInfo) == 16);
|
||||
|
||||
static inline bool AddressInRAM(VirtualMemoryAddress pc)
|
||||
{
|
||||
return VirtualAddressToPhysical(pc) < Bus::g_ram_size;
|
||||
}
|
||||
|
||||
struct PageProtectionInfo
|
||||
{
|
||||
Block* first_block_in_page;
|
||||
Block* last_block_in_page;
|
||||
|
||||
PageProtectionMode mode;
|
||||
u16 invalidate_count;
|
||||
u32 invalidate_frame;
|
||||
};
|
||||
static_assert(sizeof(PageProtectionInfo) == (sizeof(Block*) * 2 + 8));
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void InterpretCachedBlock(const Block* block);
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void InterpretUncachedBlock();
|
||||
|
||||
void LogCurrentState();
|
||||
|
||||
#if defined(ENABLE_RECOMPILER)
|
||||
#define ENABLE_RECOMPILER_SUPPORT 1
|
||||
|
||||
#if defined(_DEBUG) || false
|
||||
// Enable disassembly of host assembly code.
|
||||
#define ENABLE_HOST_DISASSEMBLY 1
|
||||
#endif
|
||||
|
||||
#if false
|
||||
// Enable profiling of JIT blocks.
|
||||
#define ENABLE_RECOMPILER_PROFILING 1
|
||||
#endif
|
||||
|
||||
JitCodeBuffer& GetCodeBuffer();
|
||||
const void* GetInterpretUncachedBlockFunction();
|
||||
|
||||
void CompileOrRevalidateBlock(u32 start_pc);
|
||||
void DiscardAndRecompileBlock(u32 start_pc);
|
||||
const void* CreateBlockLink(Block* from_block, void* code, u32 newpc);
|
||||
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address);
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
|
||||
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load);
|
||||
|
||||
u32 EmitASMFunctions(void* code, u32 code_size);
|
||||
u32 EmitJump(void* code, const void* dst, bool flush_icache);
|
||||
|
||||
void DisassembleAndLogHostCode(const void* start, u32 size);
|
||||
u32 GetHostInstructionCount(const void* start, u32 size);
|
||||
|
||||
extern CodeLUTArray g_code_lut;
|
||||
|
||||
extern NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)();
|
||||
extern const void* g_compile_or_revalidate_block;
|
||||
extern const void* g_check_events_and_dispatch;
|
||||
extern const void* g_run_events_and_dispatch;
|
||||
extern const void* g_dispatcher;
|
||||
extern const void* g_block_dispatcher;
|
||||
extern const void* g_interpret_block;
|
||||
extern const void* g_discard_and_recompile_block;
|
||||
|
||||
#ifdef ENABLE_RECOMPILER_PROFILING
|
||||
|
||||
extern PerfScope MIPSPerfScope;
|
||||
|
||||
#endif // ENABLE_RECOMPILER_PROFILING
|
||||
|
||||
#endif // ENABLE_RECOMPILER
|
||||
|
||||
} // namespace CPU::CodeCache
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue