@ -1,4 +1,5 @@
# include "common/align.h"
# include "common/assert.h"
# include "common/log.h"
# include "cpu_core.h"
# include "cpu_core_private.h"
@ -12,6 +13,7 @@ namespace CPU::Recompiler {
# if defined(ABI_WIN64)
constexpr HostReg RCPUPTR = Xbyak : : Operand : : RBP ;
constexpr HostReg RMEMBASEPTR = Xbyak : : Operand : : RBX ;
constexpr HostReg RRETURN = Xbyak : : Operand : : RAX ;
constexpr HostReg RARG1 = Xbyak : : Operand : : RCX ;
constexpr HostReg RARG2 = Xbyak : : Operand : : RDX ;
@ -21,6 +23,7 @@ constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
constexpr u64 FUNCTION_CALL_STACK_ALIGNMENT = 16 ;
# elif defined(ABI_SYSV)
constexpr HostReg RCPUPTR = Xbyak : : Operand : : RBP ;
constexpr HostReg RMEMBASEPTR = Xbyak : : Operand : : RBX ;
constexpr HostReg RRETURN = Xbyak : : Operand : : RAX ;
constexpr HostReg RARG1 = Xbyak : : Operand : : RDI ;
constexpr HostReg RARG2 = Xbyak : : Operand : : RSI ;
@ -79,6 +82,11 @@ static const Xbyak::Reg64 GetCPUPtrReg()
return GetHostReg64 ( RCPUPTR ) ;
}
static const Xbyak : : Reg64 GetFastmemBasePtrReg ( )
{
return GetHostReg64 ( RMEMBASEPTR ) ;
}
CodeGenerator : : CodeGenerator ( JitCodeBuffer * code_buffer )
: m_code_buffer ( code_buffer ) , m_register_cache ( * this ) ,
m_near_emitter ( code_buffer - > GetFreeCodeSpace ( ) , code_buffer - > GetFreeCodePointer ( ) ) ,
@ -140,7 +148,6 @@ void CodeGenerator::InitHostRegs()
m_register_cache . SetCalleeSavedHostRegs ( { Xbyak : : Operand : : RBX , Xbyak : : Operand : : RBP , Xbyak : : Operand : : RDI ,
Xbyak : : Operand : : RSI , Xbyak : : Operand : : RSP , Xbyak : : Operand : : R12 ,
Xbyak : : Operand : : R13 , Xbyak : : Operand : : R14 , Xbyak : : Operand : : R15 } ) ;
m_register_cache . SetCPUPtrHostReg ( RCPUPTR ) ;
# elif defined(ABI_SYSV)
m_register_cache . SetHostRegAllocationOrder (
{ Xbyak : : Operand : : RBX , /*Xbyak::Operand::RSP, */ Xbyak : : Operand : : RBP , Xbyak : : Operand : : R12 , Xbyak : : Operand : : R13 ,
@ -154,8 +161,9 @@ void CodeGenerator::InitHostRegs()
m_register_cache . SetCalleeSavedHostRegs ( { Xbyak : : Operand : : RBX , Xbyak : : Operand : : RSP , Xbyak : : Operand : : RBP ,
Xbyak : : Operand : : R12 , Xbyak : : Operand : : R13 , Xbyak : : Operand : : R14 ,
Xbyak : : Operand : : R15 } ) ;
m_register_cache . SetCPUPtrHostReg ( RCPUPTR ) ;
# endif
m_register_cache . SetCPUPtrHostReg ( RCPUPTR ) ;
}
void CodeGenerator : : SwitchToFarCode ( )
@ -196,11 +204,22 @@ void CodeGenerator::EmitBeginBlock()
const bool cpu_reg_allocated = m_register_cache . AllocateHostReg ( RCPUPTR ) ;
DebugAssert ( cpu_reg_allocated ) ;
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
// If there's loadstore instructions, preload the fastmem base.
if ( m_block - > contains_loadstore_instructions )
{
const bool fastmem_reg_allocated = m_register_cache . AllocateHostReg ( RMEMBASEPTR ) ;
Assert ( fastmem_reg_allocated ) ;
m_emit - > mov ( GetFastmemBasePtrReg ( ) , m_emit - > qword [ GetCPUPtrReg ( ) + offsetof ( CPU : : State , fastmem_base ) ] ) ;
}
}
void CodeGenerator : : EmitEndBlock ( )
{
m_register_cache . FreeHostReg ( RCPUPTR ) ;
if ( m_block - > contains_loadstore_instructions )
m_register_cache . FreeHostReg ( RMEMBASEPTR ) ;
m_register_cache . PopCalleeSavedRegisters ( true ) ;
m_emit - > ret ( ) ;
@ -1747,12 +1766,140 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles ( true ) ;
if ( g_settings . cpu_recompiler_memory_exceptions )
Value result = m_register_cache . AllocateScratch ( RegSize_64 ) ;
if ( g_settings . IsUsingFastmem ( ) )
{
EmitLoadGuestMemoryFastmem ( cbi , address , size , result ) ;
}
else
{
// We need to use the full 64 bits here since we test the sign bit result.
Value result = m_register_cache . AllocateScratch ( RegSize_64 ) ;
m_register_cache . FlushCallerSavedGuestRegisters ( true , true ) ;
EmitLoadGuestMemorySlowmem ( cbi , address , size , result , false ) ;
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch ( size )
{
case RegSize_8 :
ConvertValueSizeInPlace ( & result , RegSize_8 , false ) ;
break ;
case RegSize_16 :
ConvertValueSizeInPlace ( & result , RegSize_16 , false ) ;
break ;
case RegSize_32 :
ConvertValueSizeInPlace ( & result , RegSize_32 , false ) ;
break ;
default :
UnreachableCode ( ) ;
break ;
}
return result ;
}
void CodeGenerator : : EmitLoadGuestMemoryFastmem ( const CodeBlockInstruction & cbi , const Value & address , RegSize size ,
Value & result )
{
// fastmem
LoadStoreBackpatchInfo bpi ;
bpi . host_pc = GetCurrentNearCodePointer ( ) ;
bpi . address_host_reg = HostReg_Invalid ;
bpi . value_host_reg = result . host_reg ;
bpi . guest_pc = m_current_instruction - > pc ;
// can't store displacements > 0x80000000 in-line
const Value * actual_address = & address ;
if ( address . IsConstant ( ) & & address . constant_value > = 0x80000000 )
{
actual_address = & result ;
m_emit - > mov ( GetHostReg32 ( result . host_reg ) , address . constant_value ) ;
bpi . host_pc = GetCurrentNearCodePointer ( ) ;
}
// TODO: movsx/zx inline here
switch ( size )
{
case RegSize_8 :
{
if ( actual_address - > IsConstant ( ) )
{
m_emit - > mov ( GetHostReg8 ( result . host_reg ) ,
m_emit - > byte [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] ) ;
}
else
{
m_emit - > mov ( GetHostReg8 ( result . host_reg ) ,
m_emit - > byte [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ) ;
}
}
break ;
case RegSize_16 :
{
if ( actual_address - > IsConstant ( ) )
{
m_emit - > mov ( GetHostReg16 ( result . host_reg ) ,
m_emit - > word [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] ) ;
}
else
{
m_emit - > mov ( GetHostReg16 ( result . host_reg ) ,
m_emit - > word [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ) ;
}
}
break ;
case RegSize_32 :
{
if ( actual_address - > IsConstant ( ) )
{
m_emit - > mov ( GetHostReg32 ( result . host_reg ) ,
m_emit - > dword [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] ) ;
}
else
{
m_emit - > mov ( GetHostReg32 ( result . host_reg ) ,
m_emit - > dword [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ) ;
}
}
break ;
}
// TODO: BIOS reads...
EmitAddCPUStructField ( offsetof ( CPU : : State , pending_ticks ) , Value : : FromConstantU32 ( Bus : : RAM_READ_TICKS ) ) ;
// insert nops, we need at least 5 bytes for a relative jump
const u32 fastmem_size =
static_cast < u32 > ( static_cast < u8 * > ( GetCurrentNearCodePointer ( ) ) - static_cast < u8 * > ( bpi . host_pc ) ) ;
const u32 nops = ( fastmem_size < 5 ? 5 - fastmem_size : 0 ) ;
for ( u32 i = 0 ; i < nops ; i + + )
m_emit - > nop ( ) ;
bpi . host_code_size = static_cast < u32 > (
static_cast < ptrdiff_t > ( static_cast < u8 * > ( GetCurrentNearCodePointer ( ) ) - static_cast < u8 * > ( bpi . host_pc ) ) ) ;
// generate slowmem fallback
m_far_emitter . align ( 16 ) ;
bpi . host_slowmem_pc = GetCurrentFarCodePointer ( ) ;
SwitchToFarCode ( ) ;
EmitLoadGuestMemorySlowmem ( cbi , address , size , result , true ) ;
// return to the block code
m_emit - > jmp ( GetCurrentNearCodePointer ( ) ) ;
SwitchToNearCode ( ) ;
m_block - > loadstore_backpatch_info . push_back ( bpi ) ;
}
void CodeGenerator : : EmitLoadGuestMemorySlowmem ( const CodeBlockInstruction & cbi , const Value & address , RegSize size ,
Value & result , bool in_far_code )
{
if ( g_settings . cpu_recompiler_memory_exceptions )
{
// NOTE: This can leave junk in the upper bits
switch ( size )
{
@ -1779,7 +1926,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
m_register_cache . PushState ( ) ;
// load exception path
SwitchToFarCode ( ) ;
if ( ! in_far_code )
SwitchToFarCode ( ) ;
// cause_bits = (-result << 2) | BD | cop_n
m_emit - > neg ( GetHostReg32 ( result . host_reg ) ) ;
@ -1790,37 +1938,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
EmitFunctionCall ( nullptr , static_cast < void ( * ) ( u32 , u32 ) > ( & CPU : : RaiseException ) , result , GetCurrentInstructionPC ( ) ) ;
EmitExceptionExit ( ) ;
SwitchToNearCode ( ) ;
m_register_cache . PopState ( ) ;
if ( ! in_far_code )
SwitchToNearCode ( ) ;
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch ( size )
{
case RegSize_8 :
ConvertValueSizeInPlace ( & result , RegSize_8 , false ) ;
break ;
case RegSize_16 :
ConvertValueSizeInPlace ( & result , RegSize_16 , false ) ;
break ;
case RegSize_32 :
ConvertValueSizeInPlace ( & result , RegSize_32 , false ) ;
break ;
default :
UnreachableCode ( ) ;
break ;
}
return result ;
m_register_cache . PopState ( ) ;
}
else
{
Value result = m_register_cache . AllocateScratch ( RegSize_32 ) ;
m_register_cache . FlushCallerSavedGuestRegisters ( true , true ) ;
switch ( size )
{
case RegSize_8 :
@ -1839,27 +1964,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
UnreachableCode ( ) ;
break ;
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch ( size )
{
case RegSize_8 :
ConvertValueSizeInPlace ( & result , RegSize_8 , false ) ;
break ;
case RegSize_16 :
ConvertValueSizeInPlace ( & result , RegSize_16 , false ) ;
break ;
case RegSize_32 :
break ;
default :
UnreachableCode ( ) ;
break ;
}
return result ;
}
}
@ -1880,11 +1984,164 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles ( true ) ;
if ( g_settings . cpu_recompiler_memory_exceptions )
if ( g_settings . IsUsingFastmem ( ) )
{
EmitStoreGuestMemoryFastmem ( cbi , address , value ) ;
}
else
{
Value result = m_register_cache . AllocateScratch ( RegSize_32 ) ;
m_register_cache . FlushCallerSavedGuestRegisters ( true , true ) ;
EmitStoreGuestMemorySlowmem ( cbi , address , value , false ) ;
}
}
void CodeGenerator : : EmitStoreGuestMemoryFastmem ( const CodeBlockInstruction & cbi , const Value & address ,
const Value & value )
{
// fastmem
LoadStoreBackpatchInfo bpi ;
bpi . host_pc = GetCurrentNearCodePointer ( ) ;
bpi . address_host_reg = HostReg_Invalid ;
bpi . value_host_reg = value . host_reg ;
bpi . guest_pc = m_current_instruction - > pc ;
// can't store displacements > 0x80000000 in-line
const Value * actual_address = & address ;
Value temp_address ;
if ( address . IsConstant ( ) & & address . constant_value > = 0x80000000 )
{
temp_address . SetHostReg ( & m_register_cache , RRETURN , RegSize_32 ) ;
actual_address = & temp_address ;
m_emit - > mov ( GetHostReg32 ( temp_address ) , address . constant_value ) ;
bpi . host_pc = GetCurrentNearCodePointer ( ) ;
}
switch ( value . size )
{
case RegSize_8 :
{
if ( actual_address - > IsConstant ( ) )
{
if ( value . IsConstant ( ) )
{
m_emit - > mov ( m_emit - > byte [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] , value . constant_value ) ;
}
else
{
m_emit - > mov ( m_emit - > byte [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] ,
GetHostReg8 ( value . host_reg ) ) ;
}
}
else
{
if ( value . IsConstant ( ) )
{
m_emit - > mov ( m_emit - > byte [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ,
value . constant_value ) ;
}
else
{
m_emit - > mov ( m_emit - > byte [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ,
GetHostReg8 ( value . host_reg ) ) ;
}
}
}
break ;
case RegSize_16 :
{
if ( actual_address - > IsConstant ( ) )
{
if ( value . IsConstant ( ) )
{
m_emit - > mov ( m_emit - > word [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] , value . constant_value ) ;
}
else
{
m_emit - > mov ( m_emit - > word [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] ,
GetHostReg16 ( value . host_reg ) ) ;
}
}
else
{
if ( value . IsConstant ( ) )
{
m_emit - > mov ( m_emit - > word [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ,
value . constant_value ) ;
}
else
{
m_emit - > mov ( m_emit - > word [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ,
GetHostReg16 ( value . host_reg ) ) ;
}
}
}
break ;
case RegSize_32 :
{
if ( actual_address - > IsConstant ( ) )
{
if ( value . IsConstant ( ) )
{
m_emit - > mov ( m_emit - > dword [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] , value . constant_value ) ;
}
else
{
m_emit - > mov ( m_emit - > dword [ GetFastmemBasePtrReg ( ) + actual_address - > constant_value ] ,
GetHostReg32 ( value . host_reg ) ) ;
}
}
else
{
if ( value . IsConstant ( ) )
{
m_emit - > mov ( m_emit - > dword [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ,
value . constant_value ) ;
}
else
{
m_emit - > mov ( m_emit - > dword [ GetFastmemBasePtrReg ( ) + GetHostReg64 ( actual_address - > host_reg ) ] ,
GetHostReg32 ( value . host_reg ) ) ;
}
}
}
break ;
}
// insert nops, we need at least 5 bytes for a relative jump
const u32 fastmem_size =
static_cast < u32 > ( static_cast < u8 * > ( GetCurrentNearCodePointer ( ) ) - static_cast < u8 * > ( bpi . host_pc ) ) ;
const u32 nops = ( fastmem_size < 5 ? 5 - fastmem_size : 0 ) ;
for ( u32 i = 0 ; i < nops ; i + + )
m_emit - > nop ( ) ;
bpi . host_code_size = static_cast < u32 > (
static_cast < ptrdiff_t > ( static_cast < u8 * > ( GetCurrentNearCodePointer ( ) ) - static_cast < u8 * > ( bpi . host_pc ) ) ) ;
// generate slowmem fallback
m_far_emitter . align ( ) ;
bpi . host_slowmem_pc = GetCurrentFarCodePointer ( ) ;
SwitchToFarCode ( ) ;
EmitStoreGuestMemorySlowmem ( cbi , address , value , true ) ;
// return to the block code
m_emit - > jmp ( GetCurrentNearCodePointer ( ) ) ;
SwitchToNearCode ( ) ;
m_block - > loadstore_backpatch_info . push_back ( bpi ) ;
}
void CodeGenerator : : EmitStoreGuestMemorySlowmem ( const CodeBlockInstruction & cbi , const Value & address ,
const Value & value , bool in_far_code )
{
if ( g_settings . cpu_recompiler_memory_exceptions )
{
Assert ( ! in_far_code ) ;
Value result = m_register_cache . AllocateScratch ( RegSize_32 ) ;
switch ( value . size )
{
case RegSize_8 :
@ -1910,24 +2167,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
m_emit - > jnz ( GetCurrentFarCodePointer ( ) ) ;
// store exception path
SwitchToFarCode ( ) ;
if ( ! in_far_code )
SwitchToFarCode ( ) ;
// cause_bits = (result << 2) | BD | cop_n
m_emit - > shl ( GetHostReg32 ( result .host_reg ), 2 ) ;
m_emit - > or_ ( GetHostReg32 ( result .host_reg ),
m_emit - > shl ( GetHostReg32 ( result ), 2 ) ;
m_emit - > or_ ( GetHostReg32 ( result ),
Cop0Registers : : CAUSE : : MakeValueForException ( static_cast < Exception > ( 0 ) , cbi . is_branch_delay_slot , false ,
cbi . instruction . cop . cop_n ) ) ;
EmitFunctionCall ( nullptr , static_cast < void ( * ) ( u32 , u32 ) > ( & CPU : : RaiseException ) , result , GetCurrentInstructionPC ( ) ) ;
EmitExceptionExit ( ) ;
SwitchToNearCode ( ) ;
if ( ! in_far_code )
SwitchToNearCode ( ) ;
m_register_cache . PopState ( ) ;
}
else
{
m_register_cache . FlushCallerSavedGuestRegisters ( true , true ) ;
switch ( value . size )
{
case RegSize_8 :
@ -1949,6 +2206,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
}
}
bool CodeGenerator : : BackpatchLoadStore ( const LoadStoreBackpatchInfo & lbi )
{
Log_DevPrintf ( " Backpatching %p (guest PC 0x%08X) to slowmem " , lbi . host_pc , lbi . guest_pc ) ;
// turn it into a jump to the slowmem handler
Xbyak : : CodeGenerator cg ( lbi . host_code_size , lbi . host_pc ) ;
cg . jmp ( lbi . host_slowmem_pc ) ;
const s32 nops = static_cast < s32 > ( lbi . host_code_size ) -
static_cast < s32 > ( static_cast < ptrdiff_t > ( cg . getCurr ( ) - static_cast < u8 * > ( lbi . host_pc ) ) ) ;
Assert ( nops > = 0 ) ;
for ( s32 i = 0 ; i < nops ; i + + )
cg . nop ( ) ;
JitCodeBuffer : : FlushInstructionCache ( lbi . host_pc , lbi . host_code_size ) ;
return true ;
}
void CodeGenerator : : EmitLoadGlobal ( HostReg host_reg , RegSize size , const void * ptr )
{
const s64 displacement =