decoding in gem5

2022.07.14

Overview

src/cpu/o3/fetch.cc;

void Fetch::fetch(bool &status_change) {
  ...
  while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
    && !predictedBranch && !quiesce) {
    ...
    dec_ptr->needMoreBytes()
    ...
    staticInst = dec_ptr->decode(this_pc);
    ...
  }
  ...
}

src/arch/x86/decoder.hh:
```
void moreBytes(const PCStateBase &pc, Addr fetchPC) override
{
  ...
  process();
}
```
- src/arch/x86/decoder.cc:
```
void Decoder::process() {...}
```
  process() actually use a state machine to decode. The decoded info will be stored in class Docoder's member variable ExtMachInst emi. process() only decode format, opcode decode will be done in Decoder::decode(...) later.

src/arch/x86/decoder.cc:

StaticInstPtr Decoder::decode(PCStateBase &next_pc)
{
  ...
  si = decode(emi, origPC);
  return si;
}

src/arch/x86/decoder.cc:

StaticInstPtr
Decoder::decode(ExtMachInst mach_inst, Addr addr)
{
  ...
  si = decodeInst(mach_inst);
  ...
  return si;
}

build/X86/arch/x86/generated/decode-method.cc.inc:

StaticInstPtr
X86ISA::Decoder::decodeInst(X86ISA::ExtMachInst) {
  using namespace X86ISAInst;
  switch (LEGACY_LOCK) {
    case 0x0:
    switch (OPCODE_TYPE) {
      case X86ISA::OneByteOpcode:
      switch (OPCODE_OP_TOP5) {
        ...
        case 0x17:
        return new x86_macroop::MOV_R_I(machInst,
          EmulEnv(
            (OPCODE_OP_BOTTOM3 | (REX_B << 3)),
            0, OPSIZE, ADDRSIZE, STACKSIZE
        ));
        break;
        ...
}...}...}...}

build/X86/arch/x86/generated/decoder-ns.cc.inc:

x86_macroop::MOV_R_I::MOV_R_I(ExtMachInst machInst, EmulEnv _env)
  : Macroop("mov", machInst, 1, _env)
{
  ...
  microops[0] = (StaticInstPtr)(new Xamovi(machInst,
    macrocodeBlock, (1ULL << StaticInst::IsMicroop) |
      (1ULL << StaticInst::IsFirstMicroop) |
      (1ULL << StaticInst::IsLastMicroop),
    X86ISA::GpRegIndex(env.reg), adjustedImm, env.dataSize));
}

2022.07.03

fold

Take Inst::MOV(['Eb', 'Gb'],{}) for an example.

build/X86/arch/x86/generated/decode-method.cc.inc

return new x86_macroop::MOV_R_R(machInst, EmulEnv((MODRM_RM | (REX_B << 3)),
  (MODRM_REG | (REX_R << 3)),
  1,
  ADDRSIZE,
  STACKSIZE));

src/arch/x86/emulenv.hh:

EmulEnv(RegIndex _reg, RegIndex _regm,
    int _dataSize, int _addressSize, int _stackSize) :
  reg(_reg), regm(_regm), seg(SEGMENT_REG_DS),
  scale(0), index(INTREG_T0),
  base(INTREG_T0),
  dataSize(_dataSize), addressSize(_addressSize),
  stackSize(_stackSize)
{;}

Here EmulEnv._reg and EmulEnv._regm is assigned with 4-bit reg index, which is pure, is directly decode from x86 inst.

build/X86/arch/x86/generated/decoder-ns.cc.inc:

x86_macroop::MOV_R_R::MOV_R_R(...):...
{
microops[0] = new Mov(machInst, macrocodeBlock,
            (1ULL << StaticInst::IsMicroop) | (1ULL << StaticInst::IsFirstMicroop) | (1ULL << StaticInst::IsLastMicroop), env.dataSize, 0, X86ISA::GpRegIndex(env.reg), X86ISA::GpRegIndex(env.reg), X86ISA::GpRegIndex(env.regm));
}

Here Mov(...) receives 4-bit reg index as args without any modification.

build/X86/arch/x86/generated/decoder-ns.hh.inc:

template <typename ...Args>
Mov(ExtMachInst mach_inst, const char *inst_mnem,
        uint64_t set_flags, uint8_t data_size, uint16_t _ext,
        Args... args) :
    X86ISA::RegOpT<X86ISA::FoldedDestOp, X86ISA::FoldedSrc1Op, X86ISA::FoldedSrc2Op>(mach_inst, "mov", inst_mnem, set_flags,
            IntAluOp, { args... }, data_size, _ext)

src/arch/x86/insts/microregop.hh:
```
using RegOpT = InstOperands<RegOpBase, Operands...>;
```
Therefore, constructor of RegOpT actually calls InstOperands,

src/arch/x86/insts/microop_args.hh:

template <typename Base, typename ...Operands>
class InstOperands : public Base, public Operands...
{
  ...
  template <std::size_t ...I, typename ...CTorArgs>
  InstOperands(std::index_sequence<I...>, ExtMachInst mach_inst,
          const char *mnem, const char *inst_mnem, uint64_t set_flags,
          OpClass op_class, [[maybe_unused]] ArgTuple args,
          CTorArgs... ctor_args) :
      Base(mach_inst, mnem, inst_mnem, set_flags, op_class, ctor_args...),
      Operands(this, std::get<I>(args))...
  {}
  ...
};

Here, Base is RegOpBase, Operands are FoldedDestOp, FoldedSrc1Op, FoldedSrc2Op. All of their constructor are called.

src/arch/x86/insts/microregop.hh:

class RegOpBase : public X86MicroopBase
{
  ...
   RegOpBase(ExtMachInst mach_inst, const char *mnem, const char *inst_mnem,
           uint64_t set_flags, OpClass op_class, uint8_t data_size,
           uint16_t _ext) :
       X86MicroopBase(mach_inst, mnem, inst_mnem, set_flags, op_class),
       ext(_ext), dataSize(data_size),
       foldOBit((data_size == 1 && !mach_inst.rex.present) ? 1 << 6 : 0)
   {}
   ...
};

foldOBit is initialized for every potential a/b/c/dh registers. TODO: why not determine a/b/c/dh registers here?

src/arch/x86/insts/microop_args.hh:

using FoldedDestOp = FoldedOp<DestOp>;

FoldedDestOp derives from FoldedOp.

template <class Base>
struct FoldedOp : public Base
{
    ...
    template <class InstType>
    FoldedOp(InstType *inst, ArgType idx) :
        Base(INTREG_FOLDED(idx.index, inst->foldOBit), inst->dataSize)
    {}
    ...
};

Base is DestOp. The reg index passed to DestOp is modified by INTREG_FOLDED.

src/arch/x86/regs/int.hh:
```
inline static IntRegIndex
INTREG_FOLDED(int index, int foldBit)
{
    if ((index & 0x1C) == 4 && foldBit)
        index = (index - 4) | foldBit;
    return (IntRegIndex)index;
}
```
According to 2020.amd64.pdf: Table 1-10 a/b/c/dh is decoded as 0b1xx.

Therefore, INTREG_FOLDED transform a/b/c/dh reg index to 0b10000xx. 0bxx is the unified with ra/b/c/dx encoding.

xieby1's notes

decoding in gem5

Overview

fold