x86_inlineasm.nim
1 # Constantine 2 # Copyright (c) 2018-2019 Status Research & Development GmbH 3 # Copyright (c) 2020-Present Mamy André-Ratsimbazafy 4 # Licensed and distributed under either of 5 # * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). 6 # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). 7 # at your option. This file may not be copied, modified, or distributed except according to those terms. 8 9 import 10 std/[macros, strutils], 11 ./llvm 12 13 # ############################################################ 14 # 15 # x86 Inline ASM 16 # 17 # ############################################################ 18 19 macro genInstr(body: untyped): untyped = 20 result = newStmtList() 21 22 body.expectKind(nnkStmtList) 23 for op in body: 24 op.expectKind(nnkCommand) 25 doAssert op[0].eqIdent"op" 26 27 let instrName = op[1] 28 # For each op, generate a builder proc 29 op[2][0].expectKind(nnkTupleConstr) 30 op[2][0][0].expectKind(nnkStrLit) 31 op[2][0][1].expectKind(nnkStrLit) 32 op[2][0][2].expectKind(nnkStrLit) 33 op[2][0][3].expectKind(nnkBracket) 34 35 let instrBody = newStmtList() 36 37 # 1. Detect the size of registers 38 let numBits = ident"numBits" 39 let regTy = ident"regTy" 40 let fnTy = ident"fnTy" 41 let ctx = ident"ctx" 42 let lhs = op[2][0][3][0] 43 44 instrBody.add quote do: 45 let `ctx` = builder.getContext() 46 # lhs: ValueRef or uint32 or uint64 47 let `numBits` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf().getIntTypeWidth() 48 else: 8*sizeof(`lhs`) 49 let `regTy` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf() 50 elif `lhs` is uint32: `ctx`.int32_t() 51 elif `lhs` is uint64: `ctx`.int64_t() 52 else: {.error "Unsupported input type " & $typeof(`lhs`).} 53 54 # 2. Create the LLVM asm signature 55 let operands = op[2][0][3] 56 let arity = operands.len 57 58 let constraintString = op[2][0][2] 59 let constraints = ident"constraints" 60 61 let instr = op[2][0][0] 62 63 if arity == 2: 64 if constraintString.strVal.startsWith('='): 65 if constraintString.strVal.endsWith('r'): 66 instrBody.add quote do: 67 let `fnTy` = function_t(`regTy`, [`regTy`, `regTy`]) 68 else: 69 instrBody.add quote do: 70 let `fnTy` = function_t(`regTy`, [`regTy`, pointer_t(`regTy`)]) 71 else: 72 # We only support out of place "=" function. 73 # In-place with "+" requires alloca + load/stores in codegen 74 # in-place functions can be rewritten to be out-place with "matching constraints" 75 error "Unsupported constraint: " & constraintString.strVal 76 else: 77 error "Unsupported arity: " & $arity 78 79 # 3. Nothing, we can use the constraint string as is on x86 80 81 # 4. Register the inline ASM with LLVM 82 let inlineASM = ident"inlineASM" 83 let instrParam = op[2][0][1] 84 let asmString = ident"asmString" 85 86 87 instrBody.add quote do: 88 let `asmString` = if numBits == 64: static(`instr` & "q") & static(" " & `instrParam`) 89 else: static(`instr` & "l") & static(" " & `instrParam`) 90 91 instrBody.add quote do: 92 let `inlineASM` = getInlineAsm( 93 ty = `fnTy`, 94 asmString = `asmString`, 95 constraints = `constraintString`, 96 # All carry/overflow instructions have sideffect on carry flag and can't be reordered 97 # However, function calls can't be reordered. 98 # Relevant operations that affects flags are: 99 # - MUL, if the compiler decides not to use MULX 100 # - XOR, for zeroing a register 101 hasSideEffects = LlvmBool(0), 102 isAlignStack = LlvmBool(0), 103 dialect = InlineAsmDialectATT, 104 canThrow = LlvmBool(0)) 105 106 # 5. Call it 107 let opArray = nnkBracket.newTree() 108 for op in operands: 109 # when op is ValueRef: op 110 # else: constInt(uint64(op)) 111 opArray.add newCall( 112 bindSym"ValueRef", 113 nnkWhenStmt.newTree( 114 nnkElifBranch.newTree(nnkInfix.newTree(ident"is", op, bindSym"AnyValueRef"), op), 115 nnkElse.newTree(newCall(ident"constInt", regTy, newCall(ident"uint64", op))) 116 ) 117 ) 118 # builder.call2(ty, inlineASM, [lhs, rhs], name) 119 instrBody.add newCall( 120 ident"call2", ident"builder", fnTy, 121 inlineASM, opArray, ident"name") 122 123 # 6. Create the function signature 124 var opDefs: seq[NimNode] 125 opDefs.add ident"ValueRef" # Return type 126 opDefs.add newIdentDefs(ident"builder", bindSym"BuilderRef") 127 block: 128 var i = 0 129 for constraint in constraintString.strVal.split(','): 130 if constraint.startsWith('=') or constraint.startsWith("~{memory}"): 131 # Don't increment i 132 continue 133 elif constraint == "m": 134 opDefs.add newIdentDefs(operands[i], ident"ValueRef") 135 elif constraint.endsWith('r') or constraint.endsWith('0'): 136 opDefs.add newIdentDefs( 137 operands[i], 138 nnkInfix.newTree(ident"or", 139 nnkInfix.newTree(ident"or", ident"AnyValueRef", ident"uint32"), 140 ident"uint64") 141 ) 142 else: 143 error "Unsupported constraint: " & constraint 144 i += 1 145 opDefs.add newIdentDefs(ident"name", bindSym"cstring", newLit"") 146 147 result.add newProc( 148 name = nnkPostfix.newTree(ident"*", instrName), 149 params = opDefs, 150 procType = nnkProcDef, 151 body = instrBody) 152 153 # Inline x86 assembly 154 # ------------------------------------------------------------ 155 # 156 # We can generate add with carry via 157 # call { i8, i64 } @llvm.x86.addcarry.64(i8 %carryIn, i64 %a, i64 %b) 158 # 159 # We can generate multi-precision mul and mulx via 160 # 161 # define {i64, i64} @mul(i64 %x, i64 %y) #0 { 162 # 163 # %1 = zext i64 %x to i128 164 # %2 = zext i64 %y to i128 165 # %r = mul i128 %1, %2 166 # %3 = zext i32 64 to i128 167 # %4 = lshr i128 %r, %3 168 # %hi = trunc i128 %4 to i64 169 # %lo = trunc i128 %r to i64 170 # 171 # %res_tmp = insertvalue {i64, i64} undef, i64 %hi, 0 172 # %res = insertvalue {i64, i64} %res_tmp, i64 %lo, 1 173 # 174 # ret {i64, i64} %res 175 # } 176 # 177 # attributes #0 = {"target-features"="+bmi2"} 178 # 179 # mul: 180 # mov rax, rdi 181 # mul rsi 182 # mov rcx, rax 183 # mov rax, rdx 184 # mov rdx, rcx 185 # ret 186 # 187 # mul_bmi2: 188 # mov rdx, rdi 189 # mulx rax, rdx, rsi 190 # ret 191 # 192 # Note that mul(hi: var rdx, lo: var rax, a: reg/mem64, b: rax) 193 # - clobbers carry (and many other) flags 194 # - has fixed output to rdx:rax registers 195 # while mulx(hi: var reg64, lo: var reg64, a: reg/mem64, b: rdx) 196 # - does not clobber flags 197 # - has flexible register outputs 198 199 200 genInstr(): 201 # We are only concerned about the ADCX/ADOX instructions 202 # which do not have intrinsics or cannot be generated through instruction combining 203 # unlike llvm.x86.addcarry.u64 that can generate adc 204 205 # (cf/of, r) <- a+b+(cf/of) 206 op adcx_rr: ("adcx", "%2, %0;", "=r,%0,r", [lhs, rhs]) 207 op adcx_rm: ("adcx", "%2, %0;", "=r,0,m", [lhs, rhs]) 208 op adox_rr: ("adox", "%2, %0;", "=r,%0,r", [lhs, rhs]) 209 op adox_rm: ("adox", "%2, %0;", "=r,0,m", [lhs, rhs])