diff --git a/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs b/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs index c06ed5205f..9e243d378d 100644 --- a/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs +++ b/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs @@ -17,7 +17,7 @@ namespace ARMeilleure.CodeGen.Optimizations BasicBlock lastBlock = cfg.Blocks.Last; // Move cold blocks at the end of the list, so that they are emitted away from hot code. - for (block = cfg.Blocks.First; block != lastBlock; block = nextBlock) + for (block = cfg.Blocks.First; block != null; block = nextBlock) { nextBlock = block.ListNext; @@ -26,6 +26,11 @@ namespace ARMeilleure.CodeGen.Optimizations cfg.Blocks.Remove(block); cfg.Blocks.AddLast(block); } + + if (block == lastBlock) + { + break; + } } for (block = cfg.Blocks.First; block != null; block = nextBlock) diff --git a/ARMeilleure/CodeGen/Optimizations/TailMerge.cs b/ARMeilleure/CodeGen/Optimizations/TailMerge.cs new file mode 100644 index 0000000000..f85b9c69ab --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/TailMerge.cs @@ -0,0 +1,83 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class TailMerge + { + public static void RunPass(in CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + BasicBlock mergedReturn = new(cfg.Blocks.Count); + + Operand returnValue; + Operation returnOp; + + if (cctx.FuncReturnType == OperandType.None) + { + returnValue = default; + returnOp = Operation(Instruction.Return, default); + } + else + { + returnValue = cfg.AllocateLocal(cctx.FuncReturnType); + returnOp = Operation(Instruction.Return, default, returnValue); + } + + mergedReturn.Frequency = BasicBlockFrequency.Cold; + mergedReturn.Operations.AddLast(returnOp); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + Operation op = block.Operations.Last; + + if (op != default && op.Instruction == Instruction.Return) + { + block.Operations.Remove(op); + + if (cctx.FuncReturnType == OperandType.None) + { + PrepareMerge(block, mergedReturn); + } + else + { + Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0)); + + PrepareMerge(block, mergedReturn).Append(copyOp); + } + } + } + + cfg.Blocks.AddLast(mergedReturn); + cfg.Update(); + } + + private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to) + { + BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null; + + // If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps. + if (from.Operations.Count == 0 && fromPred != null) + { + for (int i = 0; i < fromPred.SuccessorsCount; i++) + { + if (fromPred.GetSuccessor(i) == from) + { + fromPred.SetSuccessor(i, to); + } + } + + // NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it. + return fromPred; + } + else + { + from.AddSuccessor(to); + + return from; + } + } + } +} diff --git a/ARMeilleure/Diagnostics/PassName.cs b/ARMeilleure/Diagnostics/PassName.cs index e37439855e..e34bf0d2fe 100644 --- a/ARMeilleure/Diagnostics/PassName.cs +++ b/ARMeilleure/Diagnostics/PassName.cs @@ -5,8 +5,10 @@ namespace ARMeilleure.Diagnostics Decoding, Translation, RegisterUsage, + TailMerge, Dominance, SsaConstruction, + RegisterToLocal, Optimization, PreAllocation, RegisterAllocation, diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs index 9e4cdb243f..817bd487e0 100644 --- a/ARMeilleure/Translation/Compiler.cs +++ b/ARMeilleure/Translation/Compiler.cs @@ -1,4 +1,5 @@ using ARMeilleure.CodeGen; +using ARMeilleure.CodeGen.Optimizations; using ARMeilleure.CodeGen.X86; using ARMeilleure.Diagnostics; using ARMeilleure.IntermediateRepresentation; @@ -13,31 +14,41 @@ namespace ARMeilleure.Translation OperandType retType, CompilerOptions options) { - Logger.StartPass(PassName.Dominance); + CompilerContext cctx = new(cfg, argTypes, retType, options); - if ((options & CompilerOptions.SsaForm) != 0) + if (options.HasFlag(CompilerOptions.Optimize)) { - Dominance.FindDominators(cfg); - Dominance.FindDominanceFrontiers(cfg); + Logger.StartPass(PassName.TailMerge); + + TailMerge.RunPass(cctx); + + Logger.EndPass(PassName.TailMerge, cfg); } - Logger.EndPass(PassName.Dominance); - - Logger.StartPass(PassName.SsaConstruction); - - if ((options & CompilerOptions.SsaForm) != 0) + if (options.HasFlag(CompilerOptions.SsaForm)) { + Logger.StartPass(PassName.Dominance); + + Dominance.FindDominators(cfg); + Dominance.FindDominanceFrontiers(cfg); + + Logger.EndPass(PassName.Dominance); + + Logger.StartPass(PassName.SsaConstruction); + Ssa.Construct(cfg); + + Logger.EndPass(PassName.SsaConstruction, cfg); } else { + Logger.StartPass(PassName.RegisterToLocal); + RegisterToLocal.Rename(cfg); + + Logger.EndPass(PassName.RegisterToLocal, cfg); } - Logger.EndPass(PassName.SsaConstruction, cfg); - - CompilerContext cctx = new(cfg, argTypes, retType, options); - return CodeGenerator.Generate(cctx); } } diff --git a/ARMeilleure/Translation/ControlFlowGraph.cs b/ARMeilleure/Translation/ControlFlowGraph.cs index 3e7ff0c955..77d2bc4653 100644 --- a/ARMeilleure/Translation/ControlFlowGraph.cs +++ b/ARMeilleure/Translation/ControlFlowGraph.cs @@ -10,7 +10,7 @@ namespace ARMeilleure.Translation private BasicBlock[] _postOrderBlocks; private int[] _postOrderMap; - public int LocalsCount { get; } + public int LocalsCount { get; private set; } public BasicBlock Entry { get; } public IntrusiveList Blocks { get; } public BasicBlock[] PostOrderBlocks => _postOrderBlocks; @@ -25,6 +25,15 @@ namespace ARMeilleure.Translation Update(); } + public Operand AllocateLocal(OperandType type) + { + Operand result = Operand.Factory.Local(type); + + result.NumberLocal(++LocalsCount); + + return result; + } + public void Update() { RemoveUnreachableBlocks(Blocks); diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index ba5116a32b..8e5349e556 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 2680; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 2721; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs index 035d45409a..775fa3abc2 100644 --- a/ARMeilleure/Translation/RegisterUsage.cs +++ b/ARMeilleure/Translation/RegisterUsage.cs @@ -203,12 +203,18 @@ namespace ARMeilleure.Translation // It always needs a context load as it is the first block to run. if (block.Predecessors.Count == 0 || hasContextLoad) { - arg = Local(OperandType.I64); + long vecMask = globalInputs[block.Index].VecMask; + long intMask = globalInputs[block.Index].IntMask; - Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0))); + if (vecMask != 0 || intMask != 0) + { + arg = Local(OperandType.I64); - LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode, loadArg, arg); - LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode, loadArg, arg); + Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0))); + + LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg); + LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg); + } } bool hasContextStore = HasContextStore(block); @@ -220,15 +226,21 @@ namespace ARMeilleure.Translation if (EndsWithReturn(block) || hasContextStore) { - if (arg == default) + long vecMask = globalOutputs[block.Index].VecMask; + long intMask = globalOutputs[block.Index].IntMask; + + if (vecMask != 0 || intMask != 0) { - arg = Local(OperandType.I64); + if (arg == default) + { + arg = Local(OperandType.I64); - block.Append(Operation(Instruction.LoadArgument, arg, Const(0))); + block.Append(Operation(Instruction.LoadArgument, arg, Const(0))); + } + + StoreLocals(block, intMask, RegisterType.Integer, mode, arg); + StoreLocals(block, vecMask, RegisterType.Vector, mode, arg); } - - StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, arg); - StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, arg); } } }