/ research / codegen / x86_inlineasm.nim
x86_inlineasm.nim
  1  # Constantine
  2  # Copyright (c) 2018-2019    Status Research & Development GmbH
  3  # Copyright (c) 2020-Present Mamy André-Ratsimbazafy
  4  # Licensed and distributed under either of
  5  #   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
  6  #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
  7  # at your option. This file may not be copied, modified, or distributed except according to those terms.
  8  
  9  import
 10    std/[macros, strutils],
 11    ./llvm
 12  
 13  # ############################################################
 14  #
 15  #                   x86 Inline ASM
 16  #
 17  # ############################################################
 18  
 19  macro genInstr(body: untyped): untyped =
 20    result = newStmtList()
 21  
 22    body.expectKind(nnkStmtList)
 23    for op in body:
 24      op.expectKind(nnkCommand)
 25      doAssert op[0].eqIdent"op"
 26  
 27      let instrName = op[1]
 28      # For each op, generate a builder proc
 29      op[2][0].expectKind(nnkTupleConstr)
 30      op[2][0][0].expectKind(nnkStrLit)
 31      op[2][0][1].expectKind(nnkStrLit)
 32      op[2][0][2].expectKind(nnkStrLit)
 33      op[2][0][3].expectKind(nnkBracket)
 34  
 35      let instrBody = newStmtList()
 36  
 37      # 1. Detect the size of registers
 38      let numBits = ident"numBits"
 39      let regTy = ident"regTy"
 40      let fnTy = ident"fnTy"
 41      let ctx = ident"ctx"
 42      let lhs = op[2][0][3][0]
 43  
 44      instrBody.add quote do:
 45        let `ctx` = builder.getContext()
 46        # lhs: ValueRef or uint32 or uint64
 47        let `numBits` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf().getIntTypeWidth()
 48                        else: 8*sizeof(`lhs`)
 49        let `regTy` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf()
 50                      elif `lhs` is uint32: `ctx`.int32_t()
 51                      elif `lhs` is uint64: `ctx`.int64_t()
 52                      else: {.error "Unsupported input type " & $typeof(`lhs`).}
 53  
 54      # 2. Create the LLVM asm signature
 55      let operands = op[2][0][3]
 56      let arity = operands.len
 57  
 58      let constraintString = op[2][0][2]
 59      let constraints = ident"constraints"
 60  
 61      let instr = op[2][0][0]
 62  
 63      if arity == 2:
 64        if constraintString.strVal.startsWith('='):
 65          if constraintString.strVal.endsWith('r'):
 66            instrBody.add quote do:
 67              let `fnTy` = function_t(`regTy`, [`regTy`, `regTy`])
 68          else:
 69            instrBody.add quote do:
 70              let `fnTy` = function_t(`regTy`, [`regTy`, pointer_t(`regTy`)])
 71        else:
 72          # We only support out of place "=" function.
 73          # In-place with "+" requires alloca + load/stores in codegen
 74          # in-place functions can be rewritten to be out-place with "matching constraints"
 75          error "Unsupported constraint: " & constraintString.strVal
 76      else:
 77        error "Unsupported arity: " & $arity
 78  
 79      # 3. Nothing, we can use the constraint string as is on x86
 80  
 81      # 4. Register the inline ASM with LLVM
 82      let inlineASM = ident"inlineASM"
 83      let instrParam = op[2][0][1]
 84      let asmString = ident"asmString"
 85  
 86  
 87      instrBody.add quote do:
 88        let `asmString` = if numBits == 64: static(`instr` & "q") & static(" " & `instrParam`)
 89                          else: static(`instr` & "l") & static(" " & `instrParam`)
 90  
 91      instrBody.add quote do:
 92        let `inlineASM` = getInlineAsm(
 93          ty = `fnTy`,
 94          asmString = `asmString`,
 95          constraints = `constraintString`,
 96          # All carry/overflow instructions have sideffect on carry flag and can't be reordered
 97          # However, function calls can't be reordered.
 98          # Relevant operations that affects flags are:
 99          # - MUL, if the compiler decides not to use MULX
100          # - XOR, for zeroing a register
101          hasSideEffects = LlvmBool(0),
102          isAlignStack = LlvmBool(0),
103          dialect = InlineAsmDialectATT,
104          canThrow = LlvmBool(0))
105  
106      # 5. Call it
107      let opArray = nnkBracket.newTree()
108      for op in operands:
109        # when op is ValueRef: op
110        # else: constInt(uint64(op))
111        opArray.add newCall(
112          bindSym"ValueRef",
113          nnkWhenStmt.newTree(
114            nnkElifBranch.newTree(nnkInfix.newTree(ident"is", op, bindSym"AnyValueRef"), op),
115            nnkElse.newTree(newCall(ident"constInt", regTy, newCall(ident"uint64", op)))
116          )
117        )
118      # builder.call2(ty, inlineASM, [lhs, rhs], name)
119      instrBody.add newCall(
120        ident"call2", ident"builder", fnTy,
121        inlineASM, opArray, ident"name")
122  
123      # 6. Create the function signature
124      var opDefs: seq[NimNode]
125      opDefs.add ident"ValueRef" # Return type
126      opDefs.add newIdentDefs(ident"builder", bindSym"BuilderRef")
127      block:
128        var i = 0
129        for constraint in constraintString.strVal.split(','):
130          if constraint.startsWith('=') or constraint.startsWith("~{memory}"):
131            # Don't increment i
132            continue
133          elif constraint == "m":
134            opDefs.add newIdentDefs(operands[i], ident"ValueRef")
135          elif constraint.endsWith('r') or constraint.endsWith('0'):
136            opDefs.add newIdentDefs(
137              operands[i],
138              nnkInfix.newTree(ident"or",
139                nnkInfix.newTree(ident"or", ident"AnyValueRef", ident"uint32"),
140                ident"uint64")
141            )
142          else:
143            error "Unsupported constraint: " & constraint
144          i += 1
145      opDefs.add newIdentDefs(ident"name", bindSym"cstring", newLit"")
146  
147      result.add newProc(
148        name = nnkPostfix.newTree(ident"*", instrName),
149        params = opDefs,
150        procType = nnkProcDef,
151        body = instrBody)
152  
153  # Inline x86 assembly
154  # ------------------------------------------------------------
155  #
156  # We can generate add with carry via
157  #   call { i8, i64 } @llvm.x86.addcarry.64(i8 %carryIn, i64 %a, i64 %b)
158  #
159  # We can generate multi-precision mul and mulx via
160  #
161  #    define {i64, i64} @mul(i64 %x, i64 %y) #0 {
162  #
163  #      %1 = zext i64 %x to i128
164  #      %2 = zext i64 %y to i128
165  #      %r = mul i128 %1, %2
166  #      %3 = zext i32 64 to i128
167  #      %4 = lshr i128 %r, %3
168  #      %hi = trunc i128 %4 to i64
169  #      %lo = trunc i128 %r to i64
170  #
171  #      %res_tmp = insertvalue {i64, i64} undef, i64 %hi, 0
172  #      %res = insertvalue {i64, i64} %res_tmp, i64 %lo, 1
173  #
174  #      ret {i64, i64} %res
175  #    }
176  #
177  #    attributes #0 = {"target-features"="+bmi2"}
178  #
179  #    mul:
180  #            mov     rax, rdi
181  #            mul     rsi
182  #            mov     rcx, rax
183  #            mov     rax, rdx
184  #            mov     rdx, rcx
185  #            ret
186  #
187  #    mul_bmi2:
188  #        mov     rdx, rdi
189  #        mulx    rax, rdx, rsi
190  #        ret
191  #
192  # Note that mul(hi: var rdx, lo: var rax, a: reg/mem64, b: rax)
193  #   - clobbers carry (and many other) flags
194  #   - has fixed output to rdx:rax registers
195  # while mulx(hi: var reg64, lo: var reg64, a: reg/mem64, b: rdx)
196  #   - does not clobber flags
197  #   - has flexible register outputs
198  
199  
200  genInstr():
201    # We are only concerned about the ADCX/ADOX instructions
202    # which do not have intrinsics or cannot be generated through instruction combining
203    # unlike llvm.x86.addcarry.u64 that can generate adc
204  
205    # (cf/of, r) <- a+b+(cf/of)
206    op adcx_rr: ("adcx", "%2, %0;", "=r,%0,r", [lhs, rhs])
207    op adcx_rm: ("adcx", "%2, %0;", "=r,0,m", [lhs, rhs])
208    op adox_rr: ("adox", "%2, %0;", "=r,%0,r", [lhs, rhs])
209    op adox_rm: ("adox", "%2, %0;", "=r,0,m", [lhs, rhs])