forked from Mirror/Ryujinx
Add an early TailMerge
pass (#2721)
* Add an early `TailMerge` pass Some translations can have a lot of guest calls and since for each guest call there is a call guard which may return. This can produce a lot of epilogue code for returns. This pass merges the epilogue into a single block. ``` Using filter 'hcq'. Using metric 'code size'. Total diff: -1648111 (-7.19 %) (bytes): Base: 22913847 Diff: 21265736 Improved: 4567, regressed: 14, unchanged: 144 ``` * Set PTC version * Address feedback * Handle `void` returning functions * Actually handle `void` returning functions * Fix `RegisterToLocal` logging
This commit is contained in:
parent
d512ce122c
commit
fbf40424f4
7 changed files with 148 additions and 26 deletions
|
@ -17,7 +17,7 @@ namespace ARMeilleure.CodeGen.Optimizations
|
|||
BasicBlock lastBlock = cfg.Blocks.Last;
|
||||
|
||||
// Move cold blocks at the end of the list, so that they are emitted away from hot code.
|
||||
for (block = cfg.Blocks.First; block != lastBlock; block = nextBlock)
|
||||
for (block = cfg.Blocks.First; block != null; block = nextBlock)
|
||||
{
|
||||
nextBlock = block.ListNext;
|
||||
|
||||
|
@ -26,6 +26,11 @@ namespace ARMeilleure.CodeGen.Optimizations
|
|||
cfg.Blocks.Remove(block);
|
||||
cfg.Blocks.AddLast(block);
|
||||
}
|
||||
|
||||
if (block == lastBlock)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (block = cfg.Blocks.First; block != null; block = nextBlock)
|
||||
|
|
83
ARMeilleure/CodeGen/Optimizations/TailMerge.cs
Normal file
83
ARMeilleure/CodeGen/Optimizations/TailMerge.cs
Normal file
|
@ -0,0 +1,83 @@
|
|||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.Translation;
|
||||
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
|
||||
|
||||
namespace ARMeilleure.CodeGen.Optimizations
|
||||
{
|
||||
static class TailMerge
|
||||
{
|
||||
public static void RunPass(in CompilerContext cctx)
|
||||
{
|
||||
ControlFlowGraph cfg = cctx.Cfg;
|
||||
|
||||
BasicBlock mergedReturn = new(cfg.Blocks.Count);
|
||||
|
||||
Operand returnValue;
|
||||
Operation returnOp;
|
||||
|
||||
if (cctx.FuncReturnType == OperandType.None)
|
||||
{
|
||||
returnValue = default;
|
||||
returnOp = Operation(Instruction.Return, default);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnValue = cfg.AllocateLocal(cctx.FuncReturnType);
|
||||
returnOp = Operation(Instruction.Return, default, returnValue);
|
||||
}
|
||||
|
||||
mergedReturn.Frequency = BasicBlockFrequency.Cold;
|
||||
mergedReturn.Operations.AddLast(returnOp);
|
||||
|
||||
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
|
||||
{
|
||||
Operation op = block.Operations.Last;
|
||||
|
||||
if (op != default && op.Instruction == Instruction.Return)
|
||||
{
|
||||
block.Operations.Remove(op);
|
||||
|
||||
if (cctx.FuncReturnType == OperandType.None)
|
||||
{
|
||||
PrepareMerge(block, mergedReturn);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0));
|
||||
|
||||
PrepareMerge(block, mergedReturn).Append(copyOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg.Blocks.AddLast(mergedReturn);
|
||||
cfg.Update();
|
||||
}
|
||||
|
||||
private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to)
|
||||
{
|
||||
BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null;
|
||||
|
||||
// If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps.
|
||||
if (from.Operations.Count == 0 && fromPred != null)
|
||||
{
|
||||
for (int i = 0; i < fromPred.SuccessorsCount; i++)
|
||||
{
|
||||
if (fromPred.GetSuccessor(i) == from)
|
||||
{
|
||||
fromPred.SetSuccessor(i, to);
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it.
|
||||
return fromPred;
|
||||
}
|
||||
else
|
||||
{
|
||||
from.AddSuccessor(to);
|
||||
|
||||
return from;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -5,8 +5,10 @@ namespace ARMeilleure.Diagnostics
|
|||
Decoding,
|
||||
Translation,
|
||||
RegisterUsage,
|
||||
TailMerge,
|
||||
Dominance,
|
||||
SsaConstruction,
|
||||
RegisterToLocal,
|
||||
Optimization,
|
||||
PreAllocation,
|
||||
RegisterAllocation,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using ARMeilleure.CodeGen;
|
||||
using ARMeilleure.CodeGen.Optimizations;
|
||||
using ARMeilleure.CodeGen.X86;
|
||||
using ARMeilleure.Diagnostics;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
|
@ -13,31 +14,41 @@ namespace ARMeilleure.Translation
|
|||
OperandType retType,
|
||||
CompilerOptions options)
|
||||
{
|
||||
Logger.StartPass(PassName.Dominance);
|
||||
CompilerContext cctx = new(cfg, argTypes, retType, options);
|
||||
|
||||
if ((options & CompilerOptions.SsaForm) != 0)
|
||||
if (options.HasFlag(CompilerOptions.Optimize))
|
||||
{
|
||||
Dominance.FindDominators(cfg);
|
||||
Dominance.FindDominanceFrontiers(cfg);
|
||||
Logger.StartPass(PassName.TailMerge);
|
||||
|
||||
TailMerge.RunPass(cctx);
|
||||
|
||||
Logger.EndPass(PassName.TailMerge, cfg);
|
||||
}
|
||||
|
||||
Logger.EndPass(PassName.Dominance);
|
||||
|
||||
Logger.StartPass(PassName.SsaConstruction);
|
||||
|
||||
if ((options & CompilerOptions.SsaForm) != 0)
|
||||
if (options.HasFlag(CompilerOptions.SsaForm))
|
||||
{
|
||||
Logger.StartPass(PassName.Dominance);
|
||||
|
||||
Dominance.FindDominators(cfg);
|
||||
Dominance.FindDominanceFrontiers(cfg);
|
||||
|
||||
Logger.EndPass(PassName.Dominance);
|
||||
|
||||
Logger.StartPass(PassName.SsaConstruction);
|
||||
|
||||
Ssa.Construct(cfg);
|
||||
|
||||
Logger.EndPass(PassName.SsaConstruction, cfg);
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger.StartPass(PassName.RegisterToLocal);
|
||||
|
||||
RegisterToLocal.Rename(cfg);
|
||||
|
||||
Logger.EndPass(PassName.RegisterToLocal, cfg);
|
||||
}
|
||||
|
||||
Logger.EndPass(PassName.SsaConstruction, cfg);
|
||||
|
||||
CompilerContext cctx = new(cfg, argTypes, retType, options);
|
||||
|
||||
return CodeGenerator.Generate(cctx);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace ARMeilleure.Translation
|
|||
private BasicBlock[] _postOrderBlocks;
|
||||
private int[] _postOrderMap;
|
||||
|
||||
public int LocalsCount { get; }
|
||||
public int LocalsCount { get; private set; }
|
||||
public BasicBlock Entry { get; }
|
||||
public IntrusiveList<BasicBlock> Blocks { get; }
|
||||
public BasicBlock[] PostOrderBlocks => _postOrderBlocks;
|
||||
|
@ -25,6 +25,15 @@ namespace ARMeilleure.Translation
|
|||
Update();
|
||||
}
|
||||
|
||||
public Operand AllocateLocal(OperandType type)
|
||||
{
|
||||
Operand result = Operand.Factory.Local(type);
|
||||
|
||||
result.NumberLocal(++LocalsCount);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public void Update()
|
||||
{
|
||||
RemoveUnreachableBlocks(Blocks);
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
|
|||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||
|
||||
private const uint InternalVersion = 2680; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const uint InternalVersion = 2721; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
|
|
@ -203,12 +203,18 @@ namespace ARMeilleure.Translation
|
|||
// It always needs a context load as it is the first block to run.
|
||||
if (block.Predecessors.Count == 0 || hasContextLoad)
|
||||
{
|
||||
arg = Local(OperandType.I64);
|
||||
long vecMask = globalInputs[block.Index].VecMask;
|
||||
long intMask = globalInputs[block.Index].IntMask;
|
||||
|
||||
Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
|
||||
if (vecMask != 0 || intMask != 0)
|
||||
{
|
||||
arg = Local(OperandType.I64);
|
||||
|
||||
LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode, loadArg, arg);
|
||||
LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode, loadArg, arg);
|
||||
Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
|
||||
|
||||
LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg);
|
||||
LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg);
|
||||
}
|
||||
}
|
||||
|
||||
bool hasContextStore = HasContextStore(block);
|
||||
|
@ -220,15 +226,21 @@ namespace ARMeilleure.Translation
|
|||
|
||||
if (EndsWithReturn(block) || hasContextStore)
|
||||
{
|
||||
if (arg == default)
|
||||
long vecMask = globalOutputs[block.Index].VecMask;
|
||||
long intMask = globalOutputs[block.Index].IntMask;
|
||||
|
||||
if (vecMask != 0 || intMask != 0)
|
||||
{
|
||||
arg = Local(OperandType.I64);
|
||||
if (arg == default)
|
||||
{
|
||||
arg = Local(OperandType.I64);
|
||||
|
||||
block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
|
||||
block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
|
||||
}
|
||||
|
||||
StoreLocals(block, intMask, RegisterType.Integer, mode, arg);
|
||||
StoreLocals(block, vecMask, RegisterType.Vector, mode, arg);
|
||||
}
|
||||
|
||||
StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, arg);
|
||||
StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Reference in a new issue