mirror of
https://github.com/PabloMK7/citra.git
synced 2025-01-18 00:17:59 +00:00
x64: Proper stack alignment in shader JIT function calls
Import Dolphin stack handling and register saving routines Also removes the x86 parts from abi files
This commit is contained in:
parent
0b6c0afeb7
commit
179ad35c2e
5 changed files with 111 additions and 455 deletions
|
@ -22,247 +22,69 @@ using namespace Gen;
|
||||||
|
|
||||||
// Shared code between Win64 and Unix64
|
// Shared code between Win64 and Unix64
|
||||||
|
|
||||||
// Sets up a __cdecl function.
|
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
|
||||||
void XEmitter::ABI_EmitPrologue(int maxCallParams)
|
size_t shadow = 0;
|
||||||
{
|
#if defined(_WIN32)
|
||||||
#ifdef _M_IX86
|
shadow = 0x20;
|
||||||
// Don't really need to do anything
|
|
||||||
#elif defined(ARCHITECTURE_x86_64)
|
|
||||||
#if _WIN32
|
|
||||||
int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8;
|
|
||||||
// Set up a stack frame so that we can call functions
|
|
||||||
// TODO: use maxCallParams
|
|
||||||
SUB(64, R(RSP), Imm8(stacksize));
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#error Arch not supported
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int count = (mask & ABI_ALL_GPRS).Count();
|
||||||
|
rsp_alignment -= count * 8;
|
||||||
|
size_t subtraction = 0;
|
||||||
|
int fpr_count = (mask & ABI_ALL_FPRS).Count();
|
||||||
|
if (fpr_count) {
|
||||||
|
// If we have any XMMs to save, we must align the stack here.
|
||||||
|
subtraction = rsp_alignment & 0xf;
|
||||||
|
}
|
||||||
|
subtraction += 16 * fpr_count;
|
||||||
|
size_t xmm_base_subtraction = subtraction;
|
||||||
|
subtraction += needed_frame_size;
|
||||||
|
subtraction += shadow;
|
||||||
|
// Final alignment.
|
||||||
|
rsp_alignment -= subtraction;
|
||||||
|
subtraction += rsp_alignment & 0xf;
|
||||||
|
|
||||||
|
*shadowp = shadow;
|
||||||
|
*subtractionp = subtraction;
|
||||||
|
*xmm_offsetp = subtraction - xmm_base_subtraction;
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::ABI_EmitEpilogue(int maxCallParams)
|
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||||
{
|
size_t shadow, subtraction, xmm_offset;
|
||||||
#ifdef _M_IX86
|
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||||
RET();
|
|
||||||
#elif defined(ARCHITECTURE_x86_64)
|
|
||||||
#ifdef _WIN32
|
|
||||||
int stacksize = ((maxCallParams+1)&~1)*8 + 8;
|
|
||||||
ADD(64, R(RSP), Imm8(stacksize));
|
|
||||||
#endif
|
|
||||||
RET();
|
|
||||||
#else
|
|
||||||
#error Arch not supported
|
|
||||||
|
|
||||||
|
for (int r : mask & ABI_ALL_GPRS)
|
||||||
|
PUSH((X64Reg)r);
|
||||||
|
|
||||||
#endif
|
if (subtraction)
|
||||||
|
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||||
|
|
||||||
|
for (int x : mask & ABI_ALL_FPRS) {
|
||||||
|
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
|
||||||
|
xmm_offset += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _M_IX86 // All32
|
return shadow;
|
||||||
|
|
||||||
// Shared code between Win32 and Unix32
|
|
||||||
void XEmitter::ABI_CallFunction(const void *func) {
|
|
||||||
ABI_AlignStack(0);
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
|
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||||
ABI_AlignStack(1 * 2);
|
size_t shadow, subtraction, xmm_offset;
|
||||||
PUSH(16, Imm16(param1));
|
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(1 * 2);
|
for (int x : mask & ABI_ALL_FPRS) {
|
||||||
|
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
|
||||||
|
xmm_offset += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
|
if (subtraction)
|
||||||
ABI_AlignStack(1 * 2 + 1 * 4);
|
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||||
PUSH(16, Imm16(param2));
|
|
||||||
PUSH(32, Imm32(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(1 * 2 + 1 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
|
for (int r = 15; r >= 0; r--) {
|
||||||
ABI_AlignStack(1 * 4);
|
if (mask[r])
|
||||||
PUSH(32, Imm32(param1));
|
POP((X64Reg)r);
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(1 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
|
|
||||||
ABI_AlignStack(2 * 4);
|
|
||||||
PUSH(32, Imm32(param2));
|
|
||||||
PUSH(32, Imm32(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(2 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
|
|
||||||
ABI_AlignStack(3 * 4);
|
|
||||||
PUSH(32, Imm32(param3));
|
|
||||||
PUSH(32, Imm32(param2));
|
|
||||||
PUSH(32, Imm32(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(3 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
|
|
||||||
ABI_AlignStack(3 * 4);
|
|
||||||
PUSH(32, ImmPtr(param3));
|
|
||||||
PUSH(32, Imm32(param2));
|
|
||||||
PUSH(32, Imm32(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(3 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2,u32 param3, void *param4) {
|
|
||||||
ABI_AlignStack(4 * 4);
|
|
||||||
PUSH(32, ImmPtr(param4));
|
|
||||||
PUSH(32, Imm32(param3));
|
|
||||||
PUSH(32, Imm32(param2));
|
|
||||||
PUSH(32, Imm32(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(4 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
|
|
||||||
ABI_AlignStack(1 * 4);
|
|
||||||
PUSH(32, ImmPtr(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(1 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
|
|
||||||
ABI_AlignStack(2 * 4);
|
|
||||||
PUSH(32, arg2);
|
|
||||||
PUSH(32, ImmPtr(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(2 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
|
|
||||||
ABI_AlignStack(3 * 4);
|
|
||||||
PUSH(32, arg3);
|
|
||||||
PUSH(32, arg2);
|
|
||||||
PUSH(32, ImmPtr(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(3 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
|
|
||||||
ABI_AlignStack(3 * 4);
|
|
||||||
PUSH(32, Imm32(param3));
|
|
||||||
PUSH(32, ImmPtr(param2));
|
|
||||||
PUSH(32, ImmPtr(param1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(3 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pass a register as a parameter.
|
|
||||||
void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
|
|
||||||
ABI_AlignStack(1 * 4);
|
|
||||||
PUSH(32, R(reg1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(1 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pass two registers as parameters.
|
|
||||||
void XEmitter::ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
|
|
||||||
{
|
|
||||||
ABI_AlignStack(2 * 4);
|
|
||||||
PUSH(32, R(reg2));
|
|
||||||
PUSH(32, R(reg1));
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(2 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
|
|
||||||
{
|
|
||||||
ABI_AlignStack(2 * 4);
|
|
||||||
PUSH(32, Imm32(param2));
|
|
||||||
PUSH(32, arg1);
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(2 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
|
|
||||||
{
|
|
||||||
ABI_AlignStack(3 * 4);
|
|
||||||
PUSH(32, Imm32(param3));
|
|
||||||
PUSH(32, Imm32(param2));
|
|
||||||
PUSH(32, arg1);
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(3 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
|
|
||||||
{
|
|
||||||
ABI_AlignStack(1 * 4);
|
|
||||||
PUSH(32, arg1);
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(1 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
|
|
||||||
{
|
|
||||||
ABI_AlignStack(2 * 4);
|
|
||||||
PUSH(32, arg2);
|
|
||||||
PUSH(32, arg1);
|
|
||||||
CALL(func);
|
|
||||||
ABI_RestoreStack(2 * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
|
||||||
// Note: 4 * 4 = 16 bytes, so alignment is preserved.
|
|
||||||
PUSH(EBP);
|
|
||||||
PUSH(EBX);
|
|
||||||
PUSH(ESI);
|
|
||||||
PUSH(EDI);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
|
||||||
POP(EDI);
|
|
||||||
POP(ESI);
|
|
||||||
POP(EBX);
|
|
||||||
POP(EBP);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
|
||||||
frameSize += 4; // reserve space for return address
|
|
||||||
unsigned int alignedSize =
|
|
||||||
#ifdef __GNUC__
|
|
||||||
(frameSize + 15) & -16;
|
|
||||||
#else
|
|
||||||
(frameSize + 3) & -4;
|
|
||||||
#endif
|
|
||||||
return alignedSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void XEmitter::ABI_AlignStack(unsigned int frameSize) {
|
|
||||||
// Mac OS X requires the stack to be 16-byte aligned before every call.
|
|
||||||
// Linux requires the stack to be 16-byte aligned before calls that put SSE
|
|
||||||
// vectors on the stack, but since we do not keep track of which calls do that,
|
|
||||||
// it is effectively every call as well.
|
|
||||||
// Windows binaries compiled with MSVC do not have such a restriction*, but I
|
|
||||||
// expect that GCC on Windows acts the same as GCC on Linux in this respect.
|
|
||||||
// It would be nice if someone could verify this.
|
|
||||||
// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times.
|
|
||||||
unsigned int fillSize =
|
|
||||||
ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4);
|
|
||||||
if (fillSize != 0) {
|
|
||||||
SUB(32, R(ESP), Imm8(fillSize));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::ABI_RestoreStack(unsigned int frameSize) {
|
|
||||||
unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize);
|
|
||||||
alignedSize -= 4; // return address is POPped at end of call
|
|
||||||
if (alignedSize != 0) {
|
|
||||||
ADD(32, R(ESP), Imm8(alignedSize));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else //64bit
|
|
||||||
|
|
||||||
// Common functions
|
// Common functions
|
||||||
void XEmitter::ABI_CallFunction(const void *func) {
|
void XEmitter::ABI_CallFunction(const void *func) {
|
||||||
u64 distance = u64(func) - (u64(code) + 5);
|
u64 distance = u64(func) - (u64(code) + 5);
|
||||||
|
@ -539,142 +361,3 @@ void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, cons
|
||||||
CALL(func);
|
CALL(func);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
|
||||||
return frameSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
|
|
||||||
// The Windows x64 ABI requires XMM6 - XMM15 to be callee saved. 10 regs.
|
|
||||||
// But, not saving XMM4 and XMM5 breaks things in VS 2010, even though they are volatile regs.
|
|
||||||
// Let's just save all 16.
|
|
||||||
const int XMM_STACK_SPACE = 16 * 16;
|
|
||||||
|
|
||||||
// Win64 Specific Code
|
|
||||||
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
|
||||||
//we only want to do this once
|
|
||||||
PUSH(RBX);
|
|
||||||
PUSH(RSI);
|
|
||||||
PUSH(RDI);
|
|
||||||
PUSH(RBP);
|
|
||||||
PUSH(R12);
|
|
||||||
PUSH(R13);
|
|
||||||
PUSH(R14);
|
|
||||||
PUSH(R15);
|
|
||||||
ABI_AlignStack(0);
|
|
||||||
|
|
||||||
// Do this after aligning, because before it's offset by 8.
|
|
||||||
SUB(64, R(RSP), Imm32(XMM_STACK_SPACE));
|
|
||||||
for (int i = 0; i < 16; ++i)
|
|
||||||
MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i));
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
|
||||||
for (int i = 0; i < 16; ++i)
|
|
||||||
MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, i * 16));
|
|
||||||
ADD(64, R(RSP), Imm32(XMM_STACK_SPACE));
|
|
||||||
|
|
||||||
ABI_RestoreStack(0);
|
|
||||||
POP(R15);
|
|
||||||
POP(R14);
|
|
||||||
POP(R13);
|
|
||||||
POP(R12);
|
|
||||||
POP(RBP);
|
|
||||||
POP(RDI);
|
|
||||||
POP(RSI);
|
|
||||||
POP(RBX);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Win64 Specific Code
|
|
||||||
void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
|
||||||
PUSH(RCX);
|
|
||||||
PUSH(RDX);
|
|
||||||
PUSH(RSI);
|
|
||||||
PUSH(RDI);
|
|
||||||
PUSH(R8);
|
|
||||||
PUSH(R9);
|
|
||||||
PUSH(R10);
|
|
||||||
PUSH(R11);
|
|
||||||
// TODO: Callers preserve XMM4-5 (XMM0-3 are args.)
|
|
||||||
ABI_AlignStack(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
|
||||||
ABI_RestoreStack(0);
|
|
||||||
POP(R11);
|
|
||||||
POP(R10);
|
|
||||||
POP(R9);
|
|
||||||
POP(R8);
|
|
||||||
POP(RDI);
|
|
||||||
POP(RSI);
|
|
||||||
POP(RDX);
|
|
||||||
POP(RCX);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
|
|
||||||
SUB(64, R(RSP), Imm8(0x28));
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
|
|
||||||
ADD(64, R(RSP), Imm8(0x28));
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
// Unix64 Specific Code
|
|
||||||
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
|
||||||
PUSH(RBX);
|
|
||||||
PUSH(RBP);
|
|
||||||
PUSH(R12);
|
|
||||||
PUSH(R13);
|
|
||||||
PUSH(R14);
|
|
||||||
PUSH(R15);
|
|
||||||
PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
|
|
||||||
// TODO: XMM?
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
|
||||||
POP(R15);
|
|
||||||
POP(R15);
|
|
||||||
POP(R14);
|
|
||||||
POP(R13);
|
|
||||||
POP(R12);
|
|
||||||
POP(RBP);
|
|
||||||
POP(RBX);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
|
||||||
PUSH(RCX);
|
|
||||||
PUSH(RDX);
|
|
||||||
PUSH(RSI);
|
|
||||||
PUSH(RDI);
|
|
||||||
PUSH(R8);
|
|
||||||
PUSH(R9);
|
|
||||||
PUSH(R10);
|
|
||||||
PUSH(R11);
|
|
||||||
PUSH(R11);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
|
||||||
POP(R11);
|
|
||||||
POP(R11);
|
|
||||||
POP(R10);
|
|
||||||
POP(R9);
|
|
||||||
POP(R8);
|
|
||||||
POP(RDI);
|
|
||||||
POP(RSI);
|
|
||||||
POP(RDX);
|
|
||||||
POP(RCX);
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
|
|
||||||
SUB(64, R(RSP), Imm8(0x08));
|
|
||||||
}
|
|
||||||
|
|
||||||
void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
|
|
||||||
ADD(64, R(RSP), Imm8(0x08));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // WIN32
|
|
||||||
|
|
||||||
#endif // 32bit
|
|
||||||
|
|
|
@ -1,35 +1,15 @@
|
||||||
// Copyright (C) 2003 Dolphin Project.
|
// Copyright 2008 Dolphin Emulator Project
|
||||||
|
// Licensed under GPLv2+
|
||||||
// This program is free software: you can redistribute it and/or modify
|
// Refer to the license.txt file included.
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation, version 2.0 or later versions.
|
|
||||||
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License 2.0 for more details.
|
|
||||||
|
|
||||||
// A copy of the GPL 2.0 should have been included with the program.
|
|
||||||
// If not, see http://www.gnu.org/licenses/
|
|
||||||
|
|
||||||
// Official SVN repository and contact information can be found at
|
|
||||||
// http://code.google.com/p/dolphin-emu/
|
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/bit_set.h"
|
||||||
|
#include "emitter.h"
|
||||||
|
|
||||||
// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
||||||
// All convensions return values in EAX (+ possibly EDX).
|
// All convensions return values in EAX (+ possibly EDX).
|
||||||
|
|
||||||
// Linux 32-bit, Windows 32-bit (cdecl, System V):
|
|
||||||
// * Caller pushes left to right
|
|
||||||
// * Caller fixes stack after call
|
|
||||||
// * function subtract from stack for local storage only.
|
|
||||||
// Scratch: EAX ECX EDX
|
|
||||||
// Callee-save: EBX ESI EDI EBP
|
|
||||||
// Parameters: -
|
|
||||||
|
|
||||||
// Windows 64-bit
|
// Windows 64-bit
|
||||||
// * 4-reg "fastcall" variant, very new-skool stack handling
|
// * 4-reg "fastcall" variant, very new-skool stack handling
|
||||||
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
|
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
|
||||||
|
@ -44,18 +24,8 @@
|
||||||
// Callee-save: RBX RBP R12 R13 R14 R15
|
// Callee-save: RBX RBP R12 R13 R14 R15
|
||||||
// Parameters: RDI RSI RDX RCX R8 R9
|
// Parameters: RDI RSI RDX RCX R8 R9
|
||||||
|
|
||||||
#ifdef _M_IX86 // 32 bit calling convention, shared by all
|
#define ABI_ALL_FPRS BitSet32(0xffff0000)
|
||||||
|
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
|
||||||
// 32-bit don't pass parameters in regs, but these are convenient to have anyway when we have to
|
|
||||||
// choose regs to put stuff in.
|
|
||||||
#define ABI_PARAM1 RCX
|
|
||||||
#define ABI_PARAM2 RDX
|
|
||||||
|
|
||||||
// There are no ABI_PARAM* here, since args are pushed.
|
|
||||||
// 32-bit bog standard cdecl, shared between linux and windows
|
|
||||||
// MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about.
|
|
||||||
|
|
||||||
#elif ARCHITECTURE_x86_64 // 64 bit calling convention
|
|
||||||
|
|
||||||
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
|
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
|
||||||
|
|
||||||
|
@ -64,7 +34,11 @@
|
||||||
#define ABI_PARAM3 R8
|
#define ABI_PARAM3 R8
|
||||||
#define ABI_PARAM4 R9
|
#define ABI_PARAM4 R9
|
||||||
|
|
||||||
#else //64-bit Unix (hopefully MacOSX too)
|
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
|
||||||
|
#define ABI_ALL_CALLER_SAVED \
|
||||||
|
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
|
||||||
|
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
|
||||||
|
#else //64-bit Unix / OS X
|
||||||
|
|
||||||
#define ABI_PARAM1 RDI
|
#define ABI_PARAM1 RDI
|
||||||
#define ABI_PARAM2 RSI
|
#define ABI_PARAM2 RSI
|
||||||
|
@ -73,6 +47,13 @@
|
||||||
#define ABI_PARAM5 R8
|
#define ABI_PARAM5 R8
|
||||||
#define ABI_PARAM6 R9
|
#define ABI_PARAM6 R9
|
||||||
|
|
||||||
|
// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
|
||||||
|
// don't actually clobber them.
|
||||||
|
#define ABI_ALL_CALLER_SAVED \
|
||||||
|
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
|
||||||
|
ABI_ALL_FPRS)
|
||||||
#endif // WIN32
|
#endif // WIN32
|
||||||
|
|
||||||
#endif // X86
|
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
|
||||||
|
|
||||||
|
#define ABI_RETURN RAX
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_set.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/code_block.h"
|
#include "common/code_block.h"
|
||||||
|
|
||||||
|
@ -356,7 +357,7 @@ private:
|
||||||
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
|
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
|
||||||
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
|
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
|
||||||
|
|
||||||
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void Write8(u8 value);
|
void Write8(u8 value);
|
||||||
|
@ -1007,25 +1008,26 @@ public:
|
||||||
ABI_CallFunctionC((const void*)func, param1);
|
ABI_CallFunctionC((const void*)func, param1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// A function that doesn't have any control over what it will do to regs,
|
/**
|
||||||
// such as the dispatcher, should be surrounded by these.
|
* Saves specified registers and adjusts the stack to be 16-byte aligned as required by the ABI
|
||||||
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
*
|
||||||
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
* @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs)
|
||||||
|
* @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8
|
||||||
|
* @param needed_frame_size Additional space needed, e.g., for function arguments passed on the stack
|
||||||
|
* @return Size of the shadow space, i.e., offset of the frame
|
||||||
|
*/
|
||||||
|
size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||||
|
|
||||||
// A function that doesn't know anything about it's surroundings, should
|
/**
|
||||||
// be surrounded by these to establish a safe environment, where it can roam free.
|
* Restores specified registers and adjusts the stack to its original alignment, i.e., the alignment before
|
||||||
// An example is a backpatch injected function.
|
* the matching PushRegistersAndAdjustStack.
|
||||||
void ABI_PushAllCallerSavedRegsAndAdjustStack();
|
*
|
||||||
void ABI_PopAllCallerSavedRegsAndAdjustStack();
|
* @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are GPRs)
|
||||||
|
* @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must be 0 or 8
|
||||||
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize);
|
* @param needed_frame_size Additional space that was needed
|
||||||
void ABI_AlignStack(unsigned int frameSize);
|
* @warning Stack must be currently 16-byte aligned
|
||||||
void ABI_RestoreStack(unsigned int frameSize);
|
*/
|
||||||
|
void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||||
// Sets up a __cdecl function.
|
|
||||||
// Only x64 really needs the parameter count.
|
|
||||||
void ABI_EmitPrologue(int maxCallParams);
|
|
||||||
void ABI_EmitEpilogue(int maxCallParams);
|
|
||||||
|
|
||||||
#ifdef _M_IX86
|
#ifdef _M_IX86
|
||||||
static int ABI_GetNumXMMRegs() { return 8; }
|
static int ABI_GetNumXMMRegs() { return 8; }
|
||||||
|
|
|
@ -122,6 +122,14 @@ static const X64Reg ONE = XMM14;
|
||||||
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
|
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
|
||||||
static const X64Reg NEGBIT = XMM15;
|
static const X64Reg NEGBIT = XMM15;
|
||||||
|
|
||||||
|
// State registers that must not be modified by external functions calls
|
||||||
|
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
||||||
|
static const BitSet32 persistent_regs = {
|
||||||
|
UNIFORMS, REGISTERS, // Pointers to register blocks
|
||||||
|
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
|
||||||
|
ONE+16, NEGBIT+16, // Constants
|
||||||
|
};
|
||||||
|
|
||||||
/// Raw constant for the source register selector that indicates no swizzling is performed
|
/// Raw constant for the source register selector that indicates no swizzling is performed
|
||||||
static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
|
static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
|
||||||
/// Raw constant for the destination register enable mask that indicates all components are enabled
|
/// Raw constant for the destination register enable mask that indicates all components are enabled
|
||||||
|
@ -295,20 +303,8 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
|
||||||
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
|
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_PushCallerSavedXMM() {
|
BitSet32 JitCompiler::PersistentCallerSavedRegs() {
|
||||||
#ifndef _WIN32
|
return persistent_regs & ABI_ALL_CALLER_SAVED;
|
||||||
SUB(64, R(RSP), Imm8(2 * 16));
|
|
||||||
MOVUPS(MDisp(RSP, 16), ONE);
|
|
||||||
MOVUPS(MDisp(RSP, 0), NEGBIT);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitCompiler::Compile_PopCallerSavedXMM() {
|
|
||||||
#ifndef _WIN32
|
|
||||||
MOVUPS(NEGBIT, MDisp(RSP, 0));
|
|
||||||
MOVUPS(ONE, MDisp(RSP, 16));
|
|
||||||
ADD(64, R(RSP), Imm8(2 * 16));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_ADD(Instruction instr) {
|
void JitCompiler::Compile_ADD(Instruction instr) {
|
||||||
|
@ -390,12 +386,9 @@ void JitCompiler::Compile_EX2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
MOVSS(XMM0, R(SRC1));
|
MOVSS(XMM0, R(SRC1));
|
||||||
|
|
||||||
// The following will actually break the stack alignment
|
ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
|
||||||
Compile_PushCallerSavedXMM();
|
|
||||||
ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
|
ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
|
||||||
Compile_PopCallerSavedXMM();
|
ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
|
||||||
|
|
||||||
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
|
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
MOVAPS(SRC1, R(XMM0));
|
MOVAPS(SRC1, R(XMM0));
|
||||||
|
@ -406,12 +399,9 @@ void JitCompiler::Compile_LG2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
MOVSS(XMM0, R(SRC1));
|
MOVSS(XMM0, R(SRC1));
|
||||||
|
|
||||||
// The following will actually break the stack alignment
|
ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
|
||||||
Compile_PushCallerSavedXMM();
|
|
||||||
ABI_CallFunction(reinterpret_cast<const void*>(log2f));
|
ABI_CallFunction(reinterpret_cast<const void*>(log2f));
|
||||||
Compile_PopCallerSavedXMM();
|
ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
|
||||||
|
|
||||||
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
|
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
MOVAPS(SRC1, R(XMM0));
|
MOVAPS(SRC1, R(XMM0));
|
||||||
|
@ -560,7 +550,7 @@ void JitCompiler::Compile_NOP(Instruction instr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_END(Instruction instr) {
|
void JitCompiler::Compile_END(Instruction instr) {
|
||||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -755,7 +745,8 @@ CompiledShader* JitCompiler::Compile() {
|
||||||
const auto& code = g_state.vs.program_code;
|
const auto& code = g_state.vs.program_code;
|
||||||
unsigned offset = g_state.regs.vs.main_offset;
|
unsigned offset = g_state.regs.vs.main_offset;
|
||||||
|
|
||||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
// The stack pointer is 8 modulo 16 at the entry of a procedure
|
||||||
|
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||||
|
|
||||||
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
|
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
|
||||||
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
|
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
|
||||||
|
|
|
@ -77,8 +77,7 @@ private:
|
||||||
void Compile_EvaluateCondition(Instruction instr);
|
void Compile_EvaluateCondition(Instruction instr);
|
||||||
void Compile_UniformCondition(Instruction instr);
|
void Compile_UniformCondition(Instruction instr);
|
||||||
|
|
||||||
void Compile_PushCallerSavedXMM();
|
BitSet32 PersistentCallerSavedRegs();
|
||||||
void Compile_PopCallerSavedXMM();
|
|
||||||
|
|
||||||
/// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
|
/// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
|
||||||
unsigned* offset_ptr = nullptr;
|
unsigned* offset_ptr = nullptr;
|
||||||
|
|
Loading…
Reference in a new issue