Implement speculative translation on the CPU (#515)

* Implement speculative translation on the cpu, and change the way how branches to unknown or untranslated addresses works

* Port t0opt changes and other cleanups

* Change namespace from translation related classes to ChocolArm64.Translation, other minor tweaks

* Fix typo

* Translate higher quality code for indirect jumps aswell, and on some cases that were missed when lower quality (tier 0) code was available

* Remove debug print

* Remove direct argument passing optimization, and enable tail calls for BR instructions

* Call delegates directly with Callvirt rather than calling Execute, do not emit calls for tier 0 code

* Remove unused property

* Rename argument on ArmSubroutine delegate
This commit is contained in:
gdkchan 2019-02-04 18:26:05 -03:00 committed by GitHub
parent f5b4f6ccc4
commit a694420d11
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 656 additions and 376 deletions

View file

@ -1,5 +1,6 @@
using ChocolArm64.Memory; using ChocolArm64.Memory;
using ChocolArm64.State; using ChocolArm64.State;
using ChocolArm64.Translation;
using System; using System;
using System.Threading; using System.Threading;

View file

@ -25,14 +25,53 @@ namespace ChocolArm64.Decoders
FillBlock(memory, mode, block); FillBlock(memory, mode, block);
OpCode64 lastOp = block.GetLastOp();
if (IsBranch(lastOp) && !IsCall(lastOp) && lastOp is IOpCodeBImm op)
{
//It's possible that the branch on this block lands on the middle of the block.
//This is more common on tight loops. In this case, we can improve the codegen
//a bit by changing the CFG and either making the branch point to the same block
//(which indicates that the block is a loop that jumps back to the start), and the
//other possible case is a jump somewhere on the middle of the block, which is
//also a loop, but in this case we need to split the block in half.
if (op.Imm == start)
{
block.Branch = block;
}
else if ((ulong)op.Imm > (ulong)start &&
(ulong)op.Imm < (ulong)block.EndPosition)
{
Block botBlock = new Block(op.Imm);
int botBlockIndex = 0;
long currPosition = start;
while ((ulong)currPosition < (ulong)op.Imm)
{
currPosition += block.OpCodes[botBlockIndex++].OpCodeSizeInBytes;
}
botBlock.OpCodes.AddRange(block.OpCodes);
botBlock.OpCodes.RemoveRange(0, botBlockIndex);
block.OpCodes.RemoveRange(botBlockIndex, block.OpCodes.Count - botBlockIndex);
botBlock.EndPosition = block.EndPosition;
block.EndPosition = op.Imm;
botBlock.Branch = botBlock;
block.Next = botBlock;
}
}
return block; return block;
} }
public static Block DecodeSubroutine( public static Block DecodeSubroutine(MemoryManager memory, long start, ExecutionMode mode)
TranslatorCache cache,
MemoryManager memory,
long start,
ExecutionMode mode)
{ {
Dictionary<long, Block> visited = new Dictionary<long, Block>(); Dictionary<long, Block> visited = new Dictionary<long, Block>();
Dictionary<long, Block> visitedEnd = new Dictionary<long, Block>(); Dictionary<long, Block> visitedEnd = new Dictionary<long, Block>();
@ -67,23 +106,16 @@ namespace ChocolArm64.Decoders
//(except BL/BLR that are sub calls) or end of executable, Next is null. //(except BL/BLR that are sub calls) or end of executable, Next is null.
if (current.OpCodes.Count > 0) if (current.OpCodes.Count > 0)
{ {
bool hasCachedSub = false;
OpCode64 lastOp = current.GetLastOp(); OpCode64 lastOp = current.GetLastOp();
if (lastOp is IOpCodeBImm op) bool isCall = IsCall(lastOp);
if (lastOp is IOpCodeBImm op && !isCall)
{ {
if (op.Emitter == InstEmit.Bl) current.Branch = Enqueue(op.Imm);
{
hasCachedSub = cache.HasSubroutine(op.Imm);
}
else
{
current.Branch = Enqueue(op.Imm);
}
} }
if (!IsUnconditionalBranch(lastOp) || hasCachedSub) if (!IsUnconditionalBranch(lastOp) || isCall)
{ {
current.Next = Enqueue(current.EndPosition); current.Next = Enqueue(current.EndPosition);
} }
@ -223,6 +255,13 @@ namespace ChocolArm64.Decoders
opCode is IOpCode32BReg; opCode is IOpCode32BReg;
} }
private static bool IsCall(OpCode64 opCode)
{
//TODO (CQ): ARM32 support.
return opCode.Emitter == InstEmit.Bl ||
opCode.Emitter == InstEmit.Blr;
}
private static bool IsException(OpCode64 opCode) private static bool IsException(OpCode64 opCode)
{ {
return opCode.Emitter == InstEmit.Brk || return opCode.Emitter == InstEmit.Brk ||

View file

@ -3,6 +3,8 @@ using ChocolArm64.State;
using ChocolArm64.Translation; using ChocolArm64.Translation;
using System.Reflection.Emit; using System.Reflection.Emit;
using static ChocolArm64.Instructions.InstEmitFlowHelper;
namespace ChocolArm64.Instructions namespace ChocolArm64.Instructions
{ {
static partial class InstEmit static partial class InstEmit
@ -39,7 +41,7 @@ namespace ChocolArm64.Instructions
context.EmitStint(RegisterAlias.Lr); context.EmitStint(RegisterAlias.Lr);
context.EmitStoreState(); context.EmitStoreState();
InstEmitFlowHelper.EmitCall(context, op.Imm); EmitCall(context, op.Imm);
} }
public static void Blr(ILEmitterCtx context) public static void Blr(ILEmitterCtx context)
@ -51,7 +53,7 @@ namespace ChocolArm64.Instructions
context.EmitStint(RegisterAlias.Lr); context.EmitStint(RegisterAlias.Lr);
context.EmitStoreState(); context.EmitStoreState();
context.Emit(OpCodes.Ret); EmitVirtualCall(context);
} }
public static void Br(ILEmitterCtx context) public static void Br(ILEmitterCtx context)
@ -61,7 +63,7 @@ namespace ChocolArm64.Instructions
context.EmitStoreState(); context.EmitStoreState();
context.EmitLdintzr(op.Rn); context.EmitLdintzr(op.Rn);
context.Emit(OpCodes.Ret); EmitVirtualJump(context);
} }
public static void Cbnz(ILEmitterCtx context) => EmitCb(context, OpCodes.Bne_Un); public static void Cbnz(ILEmitterCtx context) => EmitCb(context, OpCodes.Bne_Un);
@ -106,10 +108,17 @@ namespace ChocolArm64.Instructions
{ {
OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp;
if (context.CurrBlock.Next != null && if (context.CurrBlock.Branch != null)
context.CurrBlock.Branch != null)
{ {
context.EmitCondBranch(context.GetLabel(op.Imm), cond); context.EmitCondBranch(context.GetLabel(op.Imm), cond);
if (context.CurrBlock.Next == null)
{
context.EmitStoreState();
context.EmitLdc_I8(op.Position + 4);
context.Emit(OpCodes.Ret);
}
} }
else else
{ {
@ -135,10 +144,17 @@ namespace ChocolArm64.Instructions
{ {
OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp;
if (context.CurrBlock.Next != null && if (context.CurrBlock.Branch != null)
context.CurrBlock.Branch != null)
{ {
context.Emit(ilOp, context.GetLabel(op.Imm)); context.Emit(ilOp, context.GetLabel(op.Imm));
if (context.CurrBlock.Next == null)
{
context.EmitStoreState();
context.EmitLdc_I8(op.Position + 4);
context.Emit(OpCodes.Ret);
}
} }
else else
{ {

View file

@ -1,4 +1,6 @@
using ChocolArm64.State;
using ChocolArm64.Translation; using ChocolArm64.Translation;
using System.Reflection;
using System.Reflection.Emit; using System.Reflection.Emit;
namespace ChocolArm64.Instructions namespace ChocolArm64.Instructions
@ -7,12 +9,120 @@ namespace ChocolArm64.Instructions
{ {
public static void EmitCall(ILEmitterCtx context, long imm) public static void EmitCall(ILEmitterCtx context, long imm)
{ {
if (context.TryOptEmitSubroutineCall()) if (context.Tier == TranslationTier.Tier0)
{
context.TranslateAhead(imm);
context.EmitLdc_I8(imm);
context.Emit(OpCodes.Ret);
return;
}
if (!context.TryOptEmitSubroutineCall())
{
context.TranslateAhead(imm);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(imm);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitCall(typeof(TranslatedSub), nameof(TranslatedSub.Execute));
}
EmitContinueOrReturnCheck(context);
}
public static void EmitVirtualCall(ILEmitterCtx context)
{
EmitVirtualCallOrJump(context, isJump: false);
}
public static void EmitVirtualJump(ILEmitterCtx context)
{
EmitVirtualCallOrJump(context, isJump: true);
}
private static void EmitVirtualCallOrJump(ILEmitterCtx context, bool isJump)
{
if (context.Tier == TranslationTier.Tier0)
{
context.Emit(OpCodes.Dup);
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
context.Emit(OpCodes.Ret);
}
else
{
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
if (isJump)
{
//The tail prefix allows the JIT to jump to the next function,
//while releasing the stack space used by the current one.
//This is ideal for BR ARM instructions, which are
//basically indirect tail calls.
context.Emit(OpCodes.Tailcall);
}
MethodInfo mthdInfo = typeof(ArmSubroutine).GetMethod("Invoke");
context.EmitCall(mthdInfo, isVirtual: true);
if (!isJump)
{
EmitContinueOrReturnCheck(context);
}
else
{
context.Emit(OpCodes.Ret);
}
}
}
private static void EmitContinueOrReturnCheck(ILEmitterCtx context)
{
//Note: The return value of the called method will be placed
//at the Stack, the return value is always a Int64 with the
//return address of the function. We check if the address is
//correct, if it isn't we keep returning until we reach the dispatcher.
if (context.CurrBlock.Next != null)
{ {
//Note: the return value of the called method will be placed
//at the Stack, the return value is always a Int64 with the
//return address of the function. We check if the address is
//correct, if it isn't we keep returning until we reach the dispatcher.
context.Emit(OpCodes.Dup); context.Emit(OpCodes.Dup);
context.EmitLdc_I8(context.CurrOp.Position + 4); context.EmitLdc_I8(context.CurrOp.Position + 4);
@ -30,8 +140,6 @@ namespace ChocolArm64.Instructions
} }
else else
{ {
context.EmitLdc_I8(imm);
context.Emit(OpCodes.Ret); context.Emit(OpCodes.Ret);
} }
} }

View file

@ -1,4 +1,5 @@
using ChocolArm64.Events; using ChocolArm64.Events;
using ChocolArm64.Translation;
using System; using System;
using System.Diagnostics; using System.Diagnostics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
@ -82,6 +83,8 @@ namespace ChocolArm64.State
private static double _hostTickFreq; private static double _hostTickFreq;
internal Translator CurrentTranslator;
static CpuThreadState() static CpuThreadState()
{ {
_hostTickFreq = 1.0 / Stopwatch.Frequency; _hostTickFreq = 1.0 / Stopwatch.Frequency;

View file

@ -1,140 +0,0 @@
using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64
{
class TranslatedSub
{
private delegate long Aa64Subroutine(CpuThreadState register, MemoryManager memory);
private const int MinCallCountForReJit = 250;
private Aa64Subroutine _execDelegate;
public static int StateArgIdx { get; private set; }
public static int MemoryArgIdx { get; private set; }
public static Type[] FixedArgTypes { get; private set; }
public DynamicMethod Method { get; private set; }
public ReadOnlyCollection<Register> SubArgs { get; private set; }
private HashSet<long> _callers;
private TranslatedSubType _type;
private int _callCount;
private bool _needsReJit;
public TranslatedSub(DynamicMethod method, List<Register> subArgs)
{
Method = method ?? throw new ArgumentNullException(nameof(method));;
SubArgs = subArgs?.AsReadOnly() ?? throw new ArgumentNullException(nameof(subArgs));
_callers = new HashSet<long>();
PrepareDelegate();
}
static TranslatedSub()
{
MethodInfo mthdInfo = typeof(Aa64Subroutine).GetMethod("Invoke");
ParameterInfo[] Params = mthdInfo.GetParameters();
FixedArgTypes = new Type[Params.Length];
for (int index = 0; index < Params.Length; index++)
{
Type paramType = Params[index].ParameterType;
FixedArgTypes[index] = paramType;
if (paramType == typeof(CpuThreadState))
{
StateArgIdx = index;
}
else if (paramType == typeof(MemoryManager))
{
MemoryArgIdx = index;
}
}
}
private void PrepareDelegate()
{
string name = $"{Method.Name}_Dispatch";
DynamicMethod mthd = new DynamicMethod(name, typeof(long), FixedArgTypes);
ILGenerator generator = mthd.GetILGenerator();
generator.EmitLdargSeq(FixedArgTypes.Length);
foreach (Register reg in SubArgs)
{
generator.EmitLdarg(StateArgIdx);
generator.Emit(OpCodes.Ldfld, reg.GetField());
}
generator.Emit(OpCodes.Call, Method);
generator.Emit(OpCodes.Ret);
_execDelegate = (Aa64Subroutine)mthd.CreateDelegate(typeof(Aa64Subroutine));
}
public bool ShouldReJit()
{
if (_needsReJit && _callCount < MinCallCountForReJit)
{
_callCount++;
return false;
}
return _needsReJit;
}
public long Execute(CpuThreadState threadState, MemoryManager memory)
{
return _execDelegate(threadState, memory);
}
public void AddCaller(long position)
{
lock (_callers)
{
_callers.Add(position);
}
}
public long[] GetCallerPositions()
{
lock (_callers)
{
return _callers.ToArray();
}
}
public void SetType(TranslatedSubType type)
{
_type = type;
if (type == TranslatedSubType.SubTier0)
{
_needsReJit = true;
}
}
public void MarkForReJit() => _needsReJit = true;
}
}

View file

@ -1,8 +0,0 @@
namespace ChocolArm64
{
enum TranslatedSubType
{
SubTier0,
SubTier1
}
}

View file

@ -11,6 +11,7 @@ namespace ChocolArm64.Translation
class ILEmitterCtx class ILEmitterCtx
{ {
private TranslatorCache _cache; private TranslatorCache _cache;
private TranslatorQueue _queue;
private Dictionary<long, ILLabel> _labels; private Dictionary<long, ILLabel> _labels;
@ -23,6 +24,8 @@ namespace ChocolArm64.Translation
public Block CurrBlock => _currBlock; public Block CurrBlock => _currBlock;
public OpCode64 CurrOp => _currBlock?.OpCodes[_opcIndex]; public OpCode64 CurrOp => _currBlock?.OpCodes[_opcIndex];
public TranslationTier Tier { get; }
public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
private Dictionary<Block, ILBlock> _visitedBlocks; private Dictionary<Block, ILBlock> _visitedBlocks;
@ -47,11 +50,14 @@ namespace ChocolArm64.Translation
private const int VecTmp1Index = -5; private const int VecTmp1Index = -5;
private const int VecTmp2Index = -6; private const int VecTmp2Index = -6;
public ILEmitterCtx(TranslatorCache cache, Block graph) public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph)
{ {
_cache = cache ?? throw new ArgumentNullException(nameof(cache)); _cache = cache ?? throw new ArgumentNullException(nameof(cache));
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
_currBlock = graph ?? throw new ArgumentNullException(nameof(graph)); _currBlock = graph ?? throw new ArgumentNullException(nameof(graph));
Tier = tier;
_labels = new Dictionary<long, ILLabel>(); _labels = new Dictionary<long, ILLabel>();
_visitedBlocks = new Dictionary<Block, ILBlock>(); _visitedBlocks = new Dictionary<Block, ILBlock>();
@ -243,6 +249,16 @@ namespace ChocolArm64.Translation
return new ILBlock(); return new ILBlock();
} }
public void TranslateAhead(long position, ExecutionMode mode = ExecutionMode.Aarch64)
{
if (_cache.TryGetSubroutine(position, out TranslatedSub sub) && sub.Tier != TranslationTier.Tier0)
{
return;
}
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1));
}
public bool TryOptEmitSubroutineCall() public bool TryOptEmitSubroutineCall()
{ {
if (_currBlock.Next == null) if (_currBlock.Next == null)
@ -265,20 +281,8 @@ namespace ChocolArm64.Translation
EmitLdarg(index); EmitLdarg(index);
} }
foreach (Register reg in subroutine.SubArgs)
{
switch (reg.Type)
{
case RegisterType.Flag: Ldloc(reg.Index, IoType.Flag); break;
case RegisterType.Int: Ldloc(reg.Index, IoType.Int); break;
case RegisterType.Vector: Ldloc(reg.Index, IoType.Vector); break;
}
}
EmitCall(subroutine.Method); EmitCall(subroutine.Method);
subroutine.AddCaller(_subPosition);
return true; return true;
} }
@ -463,7 +467,12 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILOpCodeBranch(ilOp, label)); _ilBlock.Add(new ILOpCodeBranch(ilOp, label));
} }
public void Emit(string text) public void EmitFieldLoad(FieldInfo info)
{
_ilBlock.Add(new ILOpCodeLoadField(info));
}
public void EmitPrint(string text)
{ {
_ilBlock.Add(new ILOpCodeLog(text)); _ilBlock.Add(new ILOpCodeLog(text));
} }
@ -618,14 +627,9 @@ namespace ChocolArm64.Translation
EmitCall(objType.GetMethod(mthdName, BindingFlags.Instance | BindingFlags.NonPublic)); EmitCall(objType.GetMethod(mthdName, BindingFlags.Instance | BindingFlags.NonPublic));
} }
public void EmitCall(MethodInfo mthdInfo) public void EmitCall(MethodInfo mthdInfo, bool isVirtual = false)
{ {
if (mthdInfo == null) _ilBlock.Add(new ILOpCodeCall(mthdInfo ?? throw new ArgumentNullException(nameof(mthdInfo)), isVirtual));
{
throw new ArgumentNullException(nameof(mthdInfo));
}
_ilBlock.Add(new ILOpCodeCall(mthdInfo));
} }
public void EmitLdc_I(long value) public void EmitLdc_I(long value)

View file

@ -26,74 +26,32 @@ namespace ChocolArm64.Translation
_subName = subName; _subName = subName;
} }
public TranslatedSub GetSubroutine() public TranslatedSub GetSubroutine(TranslationTier tier)
{ {
LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]); LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]);
List<Register> subArgs = new List<Register>(); DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
void SetArgs(long inputs, RegisterType baseType)
{
for (int bit = 0; bit < 64; bit++)
{
long mask = 1L << bit;
if ((inputs & mask) != 0)
{
subArgs.Add(GetRegFromBit(bit, baseType));
}
}
}
SetArgs(LocalAlloc.GetIntInputs(_ilBlocks[0]), RegisterType.Int);
SetArgs(LocalAlloc.GetVecInputs(_ilBlocks[0]), RegisterType.Vector);
DynamicMethod method = new DynamicMethod(_subName, typeof(long), GetArgumentTypes(subArgs));
Generator = method.GetILGenerator(); Generator = method.GetILGenerator();
TranslatedSub subroutine = new TranslatedSub(method, subArgs); TranslatedSub subroutine = new TranslatedSub(method, tier);
int argsStart = TranslatedSub.FixedArgTypes.Length;
_locals = new Dictionary<Register, int>(); _locals = new Dictionary<Register, int>();
_localsCount = 0; _localsCount = 0;
for (int index = 0; index < subroutine.SubArgs.Count; index++) new ILOpCodeLoadState(_ilBlocks[0]).Emit(this);
{
Register reg = subroutine.SubArgs[index];
Generator.EmitLdarg(index + argsStart);
Generator.EmitStloc(GetLocalIndex(reg));
}
foreach (ILBlock ilBlock in _ilBlocks) foreach (ILBlock ilBlock in _ilBlocks)
{ {
ilBlock.Emit(this); ilBlock.Emit(this);
} }
subroutine.PrepareMethod();
return subroutine; return subroutine;
} }
private Type[] GetArgumentTypes(IList<Register> Params)
{
Type[] fixedArgs = TranslatedSub.FixedArgTypes;
Type[] output = new Type[Params.Count + fixedArgs.Length];
fixedArgs.CopyTo(output, 0);
int typeIdx = fixedArgs.Length;
for (int index = 0; index < Params.Count; index++)
{
output[typeIdx++] = GetFieldType(Params[index].Type);
}
return output;
}
public int GetLocalIndex(Register reg) public int GetLocalIndex(Register reg)
{ {
if (!_locals.TryGetValue(reg, out int index)) if (!_locals.TryGetValue(reg, out int index))

View file

@ -5,16 +5,19 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCodeCall : IILEmit struct ILOpCodeCall : IILEmit
{ {
private MethodInfo _mthdInfo; public MethodInfo Info { get; private set; }
public ILOpCodeCall(MethodInfo mthdInfo) public bool IsVirtual { get; private set; }
public ILOpCodeCall(MethodInfo info, bool isVirtual)
{ {
_mthdInfo = mthdInfo; Info = info;
IsVirtual = isVirtual;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
context.Generator.Emit(OpCodes.Call, _mthdInfo); context.Generator.Emit(IsVirtual ? OpCodes.Callvirt : OpCodes.Call, Info);
} }
} }
} }

View file

@ -0,0 +1,20 @@
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeLoadField : IILEmit
{
public FieldInfo Info { get; private set; }
public ILOpCodeLoadField(FieldInfo info)
{
Info = info;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(OpCodes.Ldfld, Info);
}
}
}

View file

@ -0,0 +1,65 @@
using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
delegate long ArmSubroutine(CpuThreadState state, MemoryManager memory);
class TranslatedSub
{
public ArmSubroutine Delegate { get; private set; }
public static int StateArgIdx { get; private set; }
public static int MemoryArgIdx { get; private set; }
public static Type[] FixedArgTypes { get; private set; }
public DynamicMethod Method { get; private set; }
public TranslationTier Tier { get; private set; }
public TranslatedSub(DynamicMethod method, TranslationTier tier)
{
Method = method ?? throw new ArgumentNullException(nameof(method));;
Tier = tier;
}
static TranslatedSub()
{
MethodInfo mthdInfo = typeof(ArmSubroutine).GetMethod("Invoke");
ParameterInfo[] Params = mthdInfo.GetParameters();
FixedArgTypes = new Type[Params.Length];
for (int index = 0; index < Params.Length; index++)
{
Type argType = Params[index].ParameterType;
FixedArgTypes[index] = argType;
if (argType == typeof(CpuThreadState))
{
StateArgIdx = index;
}
else if (argType == typeof(MemoryManager))
{
MemoryArgIdx = index;
}
}
}
public void PrepareMethod()
{
Delegate = (ArmSubroutine)Method.CreateDelegate(typeof(ArmSubroutine));
}
public long Execute(CpuThreadState threadState, MemoryManager memory)
{
return Delegate(threadState, memory);
}
}
}

View file

@ -0,0 +1,11 @@
namespace ChocolArm64.Translation
{
enum TranslationTier
{
Tier0,
Tier1,
Tier2,
Count
}
}

View file

@ -0,0 +1,188 @@
using ChocolArm64.Decoders;
using ChocolArm64.Events;
using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
using System.Threading;
namespace ChocolArm64.Translation
{
public class Translator
{
private MemoryManager _memory;
private CpuThreadState _dummyThreadState;
private TranslatorCache _cache;
private TranslatorQueue _queue;
private Thread _backgroundTranslator;
public event EventHandler<CpuTraceEventArgs> CpuTrace;
public bool EnableCpuTrace { get; set; }
private volatile int _threadCount;
public Translator(MemoryManager memory)
{
_memory = memory;
_dummyThreadState = new CpuThreadState();
_dummyThreadState.Running = false;
_cache = new TranslatorCache();
_queue = new TranslatorQueue();
}
internal void ExecuteSubroutine(CpuThread thread, long position)
{
if (Interlocked.Increment(ref _threadCount) == 1)
{
_backgroundTranslator = new Thread(TranslateQueuedSubs);
_backgroundTranslator.Start();
}
ExecuteSubroutine(thread.ThreadState, position);
if (Interlocked.Decrement(ref _threadCount) == 0)
{
_queue.ForceSignal();
}
}
private void ExecuteSubroutine(CpuThreadState state, long position)
{
state.CurrentTranslator = this;
do
{
if (EnableCpuTrace)
{
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
position = subroutine.Execute(state, _memory);
}
while (position != 0 && state.Running);
state.CurrentTranslator = null;
}
internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
}
}
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
if (sub.Tier == TranslationTier.Tier0)
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
}
return sub.Delegate;
}
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
subroutine = TranslateLowCq(position, state.GetExecutionMode());
}
return subroutine;
}
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
{
if (_queue.TryDequeue(out TranslatorQueueItem item))
{
bool isCached = _cache.TryGetSubroutine(item.Position, out TranslatedSub sub);
if (isCached && item.Tier <= sub.Tier)
{
continue;
}
if (item.Tier == TranslationTier.Tier0)
{
TranslateLowCq(item.Position, item.Mode);
}
else
{
TranslateHighCq(item.Position, item.Mode);
}
}
else
{
_queue.WaitForItems();
}
}
}
private TranslatedSub TranslateLowCq(long position, ExecutionMode mode)
{
Block block = Decoder.DecodeBasicBlock(_memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier0, block);
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
}
private void TranslateHighCq(long position, ExecutionMode mode)
{
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier1, graph);
ILBlock[] ilBlocks = context.GetILBlocks();
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
int ilOpCount = 0;
foreach (ILBlock ilBlock in ilBlocks)
{
ilOpCount += ilBlock.Count;
}
_cache.AddOrUpdate(position, subroutine, ilOpCount);
ForceAheadOfTimeCompilation(subroutine);
}
private string GetSubroutineName(long position)
{
return $"Sub{position:x16}";
}
private void ForceAheadOfTimeCompilation(TranslatedSub subroutine)
{
subroutine.Execute(_dummyThreadState, null);
}
}
}

View file

@ -4,7 +4,7 @@ using System.Diagnostics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Threading; using System.Threading;
namespace ChocolArm64 namespace ChocolArm64.Translation
{ {
class TranslatorCache class TranslatorCache
{ {
@ -58,6 +58,31 @@ namespace ChocolArm64
_sortedCache = new LinkedList<long>(); _sortedCache = new LinkedList<long>();
} }
public TranslatedSub GetOrAdd(long position, TranslatedSub subroutine, int size)
{
ClearCacheIfNeeded();
lock (_sortedCache)
{
LinkedListNode<long> node = _sortedCache.AddLast(position);
CacheBucket bucket = new CacheBucket(subroutine, node, size);
bucket = _cache.GetOrAdd(position, bucket);
if (bucket.Node == node)
{
_totalSize += size;
}
else
{
_sortedCache.Remove(node);
}
return bucket.Subroutine;
}
}
public void AddOrUpdate(long position, TranslatedSub subroutine, int size) public void AddOrUpdate(long position, TranslatedSub subroutine, int size)
{ {
ClearCacheIfNeeded(); ClearCacheIfNeeded();

View file

@ -0,0 +1,83 @@
using System.Collections.Concurrent;
using System.Threading;
namespace ChocolArm64.Translation
{
class TranslatorQueue
{
//This is the maximum number of functions to be translated that the queue can hold.
//The value may need some tuning to find the sweet spot.
private const int MaxQueueSize = 1024;
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent;
private bool _signaled;
public TranslatorQueue()
{
_translationQueue = new ConcurrentStack<TranslatorQueueItem>[(int)TranslationTier.Count];
for (int prio = 0; prio < _translationQueue.Length; prio++)
{
_translationQueue[prio] = new ConcurrentStack<TranslatorQueueItem>();
}
_queueDataReceivedEvent = new ManualResetEvent(false);
}
public void Enqueue(TranslatorQueueItem item)
{
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
if (queue.Count >= MaxQueueSize)
{
queue.TryPop(out _);
}
queue.Push(item);
_queueDataReceivedEvent.Set();
}
public bool TryDequeue(out TranslatorQueueItem item)
{
for (int prio = 0; prio < _translationQueue.Length; prio++)
{
if (_translationQueue[prio].TryPop(out item))
{
return true;
}
}
item = default(TranslatorQueueItem);
return false;
}
public void WaitForItems()
{
_queueDataReceivedEvent.WaitOne();
lock (_queueDataReceivedEvent)
{
if (!_signaled)
{
_queueDataReceivedEvent.Reset();
}
}
}
public void ForceSignal()
{
lock (_queueDataReceivedEvent)
{
_signaled = true;
_queueDataReceivedEvent.Set();
_queueDataReceivedEvent.Close();
}
}
}
}

View file

@ -0,0 +1,20 @@
using ChocolArm64.State;
namespace ChocolArm64.Translation
{
struct TranslatorQueueItem
{
public long Position { get; }
public ExecutionMode Mode { get; }
public TranslationTier Tier { get; }
public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier)
{
Position = position;
Mode = mode;
Tier = tier;
}
}
}

View file

@ -1,120 +0,0 @@
using ChocolArm64.Decoders;
using ChocolArm64.Events;
using ChocolArm64.Memory;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
namespace ChocolArm64
{
public class Translator
{
private TranslatorCache _cache;
public event EventHandler<CpuTraceEventArgs> CpuTrace;
public bool EnableCpuTrace { get; set; }
public Translator()
{
_cache = new TranslatorCache();
}
internal void ExecuteSubroutine(CpuThread thread, long position)
{
ExecuteSubroutine(thread.ThreadState, thread.Memory, position);
}
private void ExecuteSubroutine(CpuThreadState state, MemoryManager memory, long position)
{
do
{
if (EnableCpuTrace)
{
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateTier0(memory, position, state.GetExecutionMode());
}
if (sub.ShouldReJit())
{
TranslateTier1(memory, position, state.GetExecutionMode());
}
position = sub.Execute(state, memory);
}
while (position != 0 && state.Running);
}
internal bool HasCachedSub(long position)
{
return _cache.HasSubroutine(position);
}
private TranslatedSub TranslateTier0(MemoryManager memory, long position, ExecutionMode mode)
{
Block block = Decoder.DecodeBasicBlock(memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, block);
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine();
subroutine.SetType(TranslatedSubType.SubTier0);
_cache.AddOrUpdate(position, subroutine, block.OpCodes.Count);
return subroutine;
}
private void TranslateTier1(MemoryManager memory, long position, ExecutionMode mode)
{
Block graph = Decoder.DecodeSubroutine(_cache, memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, graph);
ILBlock[] ilBlocks = context.GetILBlocks();
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine();
subroutine.SetType(TranslatedSubType.SubTier1);
int ilOpCount = 0;
foreach (ILBlock ilBlock in ilBlocks)
{
ilOpCount += ilBlock.Count;
}
_cache.AddOrUpdate(position, subroutine, ilOpCount);
//Mark all methods that calls this method for ReJiting,
//since we can now call it directly which is faster.
if (_cache.TryGetSubroutine(position, out TranslatedSub oldSub))
{
foreach (long callerPos in oldSub.GetCallerPositions())
{
if (_cache.TryGetSubroutine(position, out TranslatedSub callerSub))
{
callerSub.MarkForReJit();
}
}
}
}
private string GetSubroutineName(long position)
{
return $"Sub{position:x16}";
}
}
}

View file

@ -789,7 +789,7 @@ namespace Ryujinx.Graphics.Graphics3d
GalVertexAttribType Type = (GalVertexAttribType)((Packed >> 27) & 0x7); GalVertexAttribType Type = (GalVertexAttribType)((Packed >> 27) & 0x7);
bool IsRgba = ((Packed >> 31) & 1) != 0; bool IsRgba = ((Packed >> 31) & 1) != 0;
// Check vertex array is enabled to avoid out of bounds exception when reading bytes // Check vertex array is enabled to avoid out of bounds exception when reading bytes
bool Enable = (ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + ArrayIndex * 4) & 0x1000) != 0; bool Enable = (ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + ArrayIndex * 4) & 0x1000) != 0;

View file

@ -1,6 +1,7 @@
using ChocolArm64; using ChocolArm64;
using ChocolArm64.Events; using ChocolArm64.Events;
using ChocolArm64.Memory; using ChocolArm64.Memory;
using ChocolArm64.Translation;
using Ryujinx.Common; using Ryujinx.Common;
using Ryujinx.Common.Logging; using Ryujinx.Common.Logging;
using Ryujinx.HLE.Exceptions; using Ryujinx.HLE.Exceptions;
@ -109,7 +110,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
_threads = new LinkedList<KThread>(); _threads = new LinkedList<KThread>();
Translator = new Translator(); Translator = new Translator(CpuMemory);
Translator.CpuTrace += CpuTraceHandler; Translator.CpuTrace += CpuTraceHandler;

View file

@ -1,6 +1,7 @@
using ChocolArm64; using ChocolArm64;
using ChocolArm64.Memory; using ChocolArm64.Memory;
using ChocolArm64.State; using ChocolArm64.State;
using ChocolArm64.Translation;
using NUnit.Framework; using NUnit.Framework;
@ -48,10 +49,12 @@ namespace Ryujinx.Tests.Cpu
_entryPoint = Position; _entryPoint = Position;
Translator translator = new Translator();
_ramPointer = Marshal.AllocHGlobal(new IntPtr(_size)); _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size));
_memory = new MemoryManager(_ramPointer); _memory = new MemoryManager(_ramPointer);
_memory.Map(Position, 0, _size); _memory.Map(Position, 0, _size);
Translator translator = new Translator(_memory);
_thread = new CpuThread(translator, _memory, _entryPoint); _thread = new CpuThread(translator, _memory, _entryPoint);
if (_unicornAvailable) if (_unicornAvailable)