JIT.cpp
1 #include "JIT.h" 2 3 #include <cstddef> 4 #include <mutex> 5 6 #include "preprocessor/llvm_includes_start.h" 7 #include <llvm/IR/Module.h> 8 #include <llvm/ADT/StringSwitch.h> 9 #include <llvm/ADT/Triple.h> 10 #include <llvm/ExecutionEngine/MCJIT.h> 11 #include <llvm/ExecutionEngine/SectionMemoryManager.h> 12 #include <llvm/Support/TargetSelect.h> 13 #include <llvm/Support/raw_os_ostream.h> 14 #include <evmc/evmc.h> 15 #include "preprocessor/llvm_includes_end.h" 16 17 #include "Ext.h" 18 #include "Compiler.h" 19 #include "Optimizer.h" 20 #include "Cache.h" 21 #include "ExecStats.h" 22 #include "Utils.h" 23 #include "BuildInfo.gen.h" 24 25 26 // FIXME: Move these checks to evmc tests. 27 static_assert(sizeof(evmc_uint256be) == 32, "evmc_uint256be is too big"); 28 static_assert(sizeof(evmc_address) == 20, "evmc_address is too big"); 29 static_assert(sizeof(evmc_result) == 64, "evmc_result does not fit cache line"); 30 static_assert(sizeof(evmc_message) <= 18*8, "evmc_message not optimally packed"); 31 static_assert(offsetof(evmc_message, code_hash) % 8 == 0, "evmc_message.code_hash not aligned"); 32 33 // Check enums match int size. 34 // On GCC/clang the underlying type should be unsigned int, on MSVC int 35 static_assert(sizeof(evmc_call_kind) == sizeof(int), "Enum `evmc_call_kind` is not the size of int"); 36 static_assert(sizeof(evmc_revision) == sizeof(int), "Enum `evmc_revision` is not the size of int"); 37 38 constexpr size_t optionalDataSize = sizeof(evmc_result) - offsetof(evmc_result, create_address); 39 static_assert(optionalDataSize == sizeof(evmc_result_optional_data), ""); 40 41 42 namespace dev 43 { 44 namespace evmjit 45 { 46 using namespace eth::jit; 47 48 namespace 49 { 50 using ExecFunc = ReturnCode(*)(ExecutionContext*); 51 52 struct CodeMapEntry 53 { 54 ExecFunc func = nullptr; 55 size_t hits = 0; 56 57 CodeMapEntry() = default; 58 explicit CodeMapEntry(ExecFunc func) : func(func) {} 59 }; 60 61 char toChar(evmc_revision rev) 62 { 63 switch (rev) 64 { 65 case EVMC_FRONTIER: return 'F'; 66 case EVMC_HOMESTEAD: return 'H'; 67 case EVMC_TANGERINE_WHISTLE: return 'T'; 68 case EVMC_SPURIOUS_DRAGON: return 'S'; 69 case EVMC_BYZANTIUM: return 'B'; 70 case EVMC_CONSTANTINOPLE: return 'C'; 71 } 72 LLVM_BUILTIN_UNREACHABLE; 73 } 74 75 /// Combine code hash and EVM revision into a printable code identifier. 76 std::string makeCodeId(evmc_uint256be codeHash, evmc_revision rev, uint32_t flags) 77 { 78 static const auto hexChars = "0123456789abcdef"; 79 std::string str; 80 str.reserve(sizeof(codeHash) * 2 + 1); 81 for (auto b: codeHash.bytes) 82 { 83 str.push_back(hexChars[b >> 4]); 84 str.push_back(hexChars[b & 0xf]); 85 } 86 str.push_back(toChar(rev)); 87 if (flags & EVMC_STATIC) 88 str.push_back('S'); 89 return str; 90 } 91 92 void printVersion() 93 { 94 std::cout << "Ethereum EVM JIT Compiler (http://github.com/ethereum/evmjit):\n" 95 << " EVMJIT version " << EVMJIT_VERSION << "\n" 96 #ifdef NDEBUG 97 << " Optimized build, " 98 #else 99 << " DEBUG build, " 100 #endif 101 << __DATE__ << " (" << __TIME__ << ")\n" 102 << std::endl; 103 } 104 105 namespace cl = llvm::cl; 106 cl::opt<bool> g_optimize{"O", cl::desc{"Optimize"}}; 107 cl::opt<CacheMode> g_cache{"cache", cl::desc{"Cache compiled EVM code on disk"}, 108 cl::values( 109 clEnumValN(CacheMode::off, "0", "Disabled"), 110 clEnumValN(CacheMode::on, "1", "Enabled"), 111 clEnumValN(CacheMode::read, "r", "Read only. No new objects are added to cache."), 112 clEnumValN(CacheMode::write, "w", "Write only. No objects are loaded from cache."), 113 clEnumValN(CacheMode::clear, "c", "Clear the cache storage. Cache is disabled."), 114 clEnumValN(CacheMode::preload, "p", "Preload all cached objects."))}; 115 cl::opt<bool> g_stats{"st", cl::desc{"Statistics"}}; 116 cl::opt<bool> g_dump{"dump", cl::desc{"Dump LLVM IR module"}}; 117 118 void parseOptions() 119 { 120 static llvm::llvm_shutdown_obj shutdownObj{}; 121 cl::AddExtraVersionPrinter(printVersion); 122 cl::ParseEnvironmentOptions("evmjit", "EVMJIT", "Ethereum EVM JIT Compiler"); 123 } 124 125 class SymbolResolver; 126 127 class JITImpl: public evmc_instance 128 { 129 std::unique_ptr<llvm::ExecutionEngine> m_engine; 130 SymbolResolver const* m_memoryMgr = nullptr; 131 mutable std::mutex x_codeMap; 132 std::unordered_map<std::string, CodeMapEntry> m_codeMap; 133 134 static llvm::LLVMContext& getLLVMContext() 135 { 136 // TODO: This probably should be thread_local, but for now that causes 137 // a crash when MCJIT is destroyed. 138 static llvm::LLVMContext llvmContext; 139 return llvmContext; 140 } 141 142 void createEngine(); 143 144 public: 145 static JITImpl& instance() 146 { 147 // We need to keep this a singleton. 148 // so we only call changeVersion on it. 149 static JITImpl s_instance; 150 return s_instance; 151 } 152 153 JITImpl(); 154 155 void checkMemorySize(); 156 157 llvm::ExecutionEngine& engine() { return *m_engine; } 158 159 CodeMapEntry getExecFunc(std::string const& _codeIdentifier); 160 void mapExecFunc(std::string const& _codeIdentifier, ExecFunc _funcAddr); 161 162 ExecFunc compile(evmc_revision _rev, bool _staticCall, byte const* _code, uint64_t _codeSize, std::string const& _codeIdentifier); 163 164 evmc_context_fn_table const* host = nullptr; 165 166 evmc_message const* currentMsg = nullptr; 167 std::vector<uint8_t> returnBuffer; 168 169 std::vector<uint8_t> codeBuffer; 170 171 size_t hitThreshold = 0; 172 }; 173 174 int64_t call(evmc_context* _ctx, int _kind, int64_t _gas, evmc_address const* _address, 175 evmc_uint256be const* _value, uint8_t const* _inputData, size_t _inputSize, uint8_t* _outputData, 176 size_t _outputSize, uint8_t const** o_bufData, size_t* o_bufSize) noexcept 177 { 178 // FIXME: Handle unexpected exceptions. 179 auto& jit = JITImpl::instance(); 180 181 evmc_message msg; 182 msg.destination = *_address; 183 msg.sender = _kind != EVMC_DELEGATECALL ? jit.currentMsg->destination : jit.currentMsg->sender; 184 msg.value = _kind != EVMC_DELEGATECALL ? *_value : jit.currentMsg->value; 185 msg.input_data = _inputData; 186 msg.input_size = _inputSize; 187 msg.gas = _gas; 188 msg.depth = jit.currentMsg->depth + 1; 189 msg.flags = jit.currentMsg->flags; 190 if (_kind == EVM_STATICCALL) 191 { 192 msg.kind = EVMC_CALL; 193 msg.flags |= EVMC_STATIC; 194 } 195 else 196 msg.kind = static_cast<evmc_call_kind>(_kind); 197 198 // FIXME: Handle code hash. 199 evmc_result result; 200 jit.host->call(&result, _ctx, &msg); 201 // FIXME: Clarify when gas_left is valid. 202 int64_t r = result.gas_left; 203 204 // Handle output. It can contain data from RETURN or REVERT opcodes. 205 auto size = std::min(_outputSize, result.output_size); 206 std::copy_n(result.output_data, size, _outputData); 207 208 // Update RETURNDATA buffer. 209 // The buffer is already cleared. 210 jit.returnBuffer = {result.output_data, result.output_data + result.output_size}; 211 *o_bufData = jit.returnBuffer.data(); 212 *o_bufSize = jit.returnBuffer.size(); 213 214 if (_kind == EVMC_CREATE && result.status_code == EVMC_SUCCESS) 215 std::copy_n(result.create_address.bytes, sizeof(result.create_address), _outputData); 216 217 if (result.status_code != EVMC_SUCCESS) 218 r |= EVM_CALL_FAILURE; 219 220 if (result.release) 221 result.release(&result); 222 return r; 223 } 224 225 226 /// A wrapper for new EVM-C copycode callback function. 227 size_t getCode(uint8_t** o_pCode, evmc_context* _ctx, evmc_address const* _address) noexcept 228 { 229 auto& jit = JITImpl::instance(); 230 size_t codeSize = jit.host->get_code_size(_ctx, _address); 231 jit.codeBuffer.resize(codeSize); // Allocate needed memory to store the full code. 232 233 // Copy the code to JIT's buffer and send the buffer reference back to LLVM. 234 size_t size = 235 jit.host->copy_code(_ctx, _address, 0, jit.codeBuffer.data(), jit.codeBuffer.size()); 236 *o_pCode = jit.codeBuffer.data(); 237 return size; 238 } 239 240 class SymbolResolver : public llvm::SectionMemoryManager 241 { 242 llvm::JITSymbol findSymbol(std::string const& _name) override 243 { 244 auto& jit = JITImpl::instance(); 245 246 // Handle symbols' global prefix. 247 // If in current DataLayout global symbols are prefixed, drop the 248 // prefix from the name for local search. 249 char prefix = jit.engine().getDataLayout().getGlobalPrefix(); 250 llvm::StringRef unprefixedName = (prefix != '\0' && _name[0] == prefix) 251 ? llvm::StringRef{_name}.drop_front() : llvm::StringRef{_name}; 252 253 auto addr = 254 llvm::StringSwitch<uint64_t>(unprefixedName) 255 .Case("env_sha3", reinterpret_cast<uint64_t>(&keccak)) 256 .Case("evm.exists", reinterpret_cast<uint64_t>(jit.host->account_exists)) 257 .Case("evm.sload", reinterpret_cast<uint64_t>(jit.host->get_storage)) 258 .Case("evm.sstore", reinterpret_cast<uint64_t>(jit.host->set_storage)) 259 .Case("evm.balance", reinterpret_cast<uint64_t>(jit.host->get_balance)) 260 .Case("evm.codesize", reinterpret_cast<uint64_t>(jit.host->get_code_size)) 261 .Case("evm.code", reinterpret_cast<uint64_t>(getCode)) 262 .Case("evm.selfdestruct", reinterpret_cast<uint64_t>(jit.host->selfdestruct)) 263 .Case("evm.call", reinterpret_cast<uint64_t>(call)) 264 .Case("evm.get_tx_context", reinterpret_cast<uint64_t>(jit.host->get_tx_context)) 265 .Case("evm.blockhash", reinterpret_cast<uint64_t>(jit.host->get_block_hash)) 266 .Case("evm.log", reinterpret_cast<uint64_t>(jit.host->emit_log)) 267 .Default(0); 268 if (addr) 269 return {addr, llvm::JITSymbolFlags::Exported}; 270 271 // Fallback to default implementation that would search for the symbol 272 // in the current process. Use the original prefixed symbol name. 273 // TODO: In the future we should control the whole set of requested 274 // symbols (like memcpy, memset, etc) to improve performance. 275 return llvm::SectionMemoryManager::findSymbol(_name); 276 } 277 278 void reportMemorySize(size_t _addedSize) 279 { 280 m_totalMemorySize += _addedSize; 281 282 if (!g_stats) 283 return; 284 285 if (m_totalMemorySize >= m_printMemoryLimit) 286 { 287 constexpr size_t printMemoryStep = 10 * 1024 * 1024; 288 auto value = double(m_totalMemorySize) / printMemoryStep; 289 std::cerr << "EVMJIT total memory size: " << (10 * value) << " MB\n"; 290 m_printMemoryLimit += printMemoryStep; 291 } 292 } 293 294 uint8_t* allocateCodeSection(uintptr_t _size, unsigned _a, unsigned _id, 295 llvm::StringRef _name) override 296 { 297 reportMemorySize(_size); 298 return llvm::SectionMemoryManager::allocateCodeSection(_size, _a, _id, _name); 299 } 300 301 uint8_t* allocateDataSection(uintptr_t _size, unsigned _a, unsigned _id, 302 llvm::StringRef _name, bool _ro) override 303 { 304 reportMemorySize(_size); 305 return llvm::SectionMemoryManager::allocateDataSection(_size, _a, _id, _name, _ro); 306 } 307 308 size_t m_totalMemorySize = 0; 309 size_t m_printMemoryLimit = 1024 * 1024; 310 311 public: 312 size_t totalMemorySize() const { return m_totalMemorySize; } 313 }; 314 315 316 CodeMapEntry JITImpl::getExecFunc(std::string const& _codeIdentifier) 317 { 318 std::lock_guard<std::mutex> lock{x_codeMap}; 319 auto& entry = m_codeMap[_codeIdentifier]; 320 ++entry.hits; 321 return entry; 322 } 323 324 void JITImpl::mapExecFunc(std::string const& _codeIdentifier, ExecFunc _funcAddr) 325 { 326 std::lock_guard<std::mutex> lock{x_codeMap}; 327 m_codeMap[_codeIdentifier].func = _funcAddr; 328 } 329 330 ExecFunc JITImpl::compile(evmc_revision _rev, bool _staticCall, byte const* _code, uint64_t _codeSize, 331 std::string const& _codeIdentifier) 332 { 333 auto module = Cache::getObject(_codeIdentifier, getLLVMContext()); 334 if (!module) 335 { 336 // TODO: Listener support must be redesigned. These should be a feature of JITImpl 337 //listener->stateChanged(ExecState::Compilation); 338 assert(_code || !_codeSize); 339 //TODO: Can the Compiler be stateless? 340 module = Compiler({}, _rev, _staticCall, getLLVMContext()).compile(_code, _code + _codeSize, _codeIdentifier); 341 342 if (g_optimize) 343 { 344 //listener->stateChanged(ExecState::Optimization); 345 optimize(*module); 346 } 347 348 prepare(*module); 349 } 350 351 if (g_dump) 352 { 353 llvm::raw_os_ostream cerr{std::cerr}; 354 module->print(cerr, nullptr); 355 } 356 357 358 m_engine->addModule(std::move(module)); 359 //listener->stateChanged(ExecState::CodeGen); 360 return (ExecFunc)m_engine->getFunctionAddress(_codeIdentifier); 361 } 362 363 } // anonymous namespace 364 365 366 ExecutionContext::~ExecutionContext() noexcept 367 { 368 if (m_memData) 369 std::free(m_memData); 370 } 371 372 bytes_ref ExecutionContext::getReturnData() const 373 { 374 auto data = m_data->callData; 375 auto size = static_cast<size_t>(m_data->callDataSize); 376 377 if (data < m_memData || data >= m_memData + m_memSize || size == 0) 378 { 379 assert(size == 0); // data can be an invalid pointer only if size is 0 380 m_data->callData = nullptr; 381 return {}; 382 } 383 384 return bytes_ref{data, size}; 385 } 386 387 extern "C" 388 { 389 390 EXPORT evmc_instance* evmjit_create() 391 { 392 // Let's always return the same instance. It's a bit of faking, but actually 393 // this might be a compliant implementation. 394 return &JITImpl::instance(); 395 } 396 397 static void destroy(evmc_instance* instance) 398 { 399 (void)instance; 400 assert(instance == static_cast<void*>(&JITImpl::instance())); 401 } 402 403 static evmc_result execute(evmc_instance* instance, evmc_context* context, evmc_revision rev, 404 evmc_message const* msg, uint8_t const* code, size_t code_size) 405 { 406 auto& jit = *reinterpret_cast<JITImpl*>(instance); 407 408 if (msg->depth == 0) 409 jit.checkMemorySize(); 410 411 if (!jit.host) 412 jit.host = context->fn_table; 413 assert(jit.host == context->fn_table); // Require the fn_table not to change. 414 415 // TODO: Temporary keep track of the current message. 416 evmc_message const* prevMsg = jit.currentMsg; 417 jit.currentMsg = msg; 418 419 RuntimeData rt; 420 rt.code = code; 421 rt.codeSize = code_size; 422 rt.gas = msg->gas; 423 rt.callData = msg->input_data; 424 rt.callDataSize = msg->input_size; 425 std::memcpy(&rt.apparentValue, &msg->value, sizeof(msg->value)); 426 std::memset(&rt.address, 0, 12); 427 std::memcpy(&rt.address[12], &msg->destination, sizeof(msg->destination)); 428 std::memset(&rt.caller, 0, 12); 429 std::memcpy(&rt.caller[12], &msg->sender, sizeof(msg->sender)); 430 rt.depth = msg->depth; 431 432 ExecutionContext ctx{rt, context}; 433 434 evmc_result result; 435 result.status_code = EVMC_SUCCESS; 436 result.gas_left = 0; 437 result.output_data = nullptr; 438 result.output_size = 0; 439 result.release = nullptr; 440 441 auto codeIdentifier = makeCodeId(msg->code_hash, rev, msg->flags); 442 auto codeEntry = jit.getExecFunc(codeIdentifier); 443 auto func = codeEntry.func; 444 if (!func) 445 { 446 //FIXME: We have a race condition here! 447 448 if (codeEntry.hits <= jit.hitThreshold) 449 { 450 result.status_code = EVMC_REJECTED; 451 return result; 452 } 453 454 if (g_stats) 455 std::cerr << "EVMJIT Compile " << codeIdentifier << " (" << codeEntry.hits << ")\n"; 456 457 const bool staticCall = (msg->flags & EVMC_STATIC) != 0; 458 func = jit.compile(rev, staticCall, ctx.code(), ctx.codeSize(), codeIdentifier); 459 if (!func) 460 { 461 result.status_code = EVMC_INTERNAL_ERROR; 462 return result; 463 } 464 jit.mapExecFunc(codeIdentifier, func); 465 } 466 467 auto returnCode = func(&ctx); 468 469 if (returnCode == ReturnCode::Revert) 470 { 471 result.status_code = EVMC_REVERT; 472 result.gas_left = rt.gas; 473 } 474 else if (returnCode == ReturnCode::OutOfGas) 475 { 476 // EVMJIT does not provide information what exactly type of failure 477 // it was, so use generic EVM_FAILURE. 478 result.status_code = EVMC_FAILURE; 479 } 480 else 481 { 482 // In case of success return the amount of gas left. 483 result.gas_left = rt.gas; 484 } 485 486 if (returnCode == ReturnCode::Return || returnCode == ReturnCode::Revert) 487 { 488 auto out = ctx.getReturnData(); 489 result.output_data = std::get<0>(out); 490 result.output_size = std::get<1>(out); 491 } 492 493 // Take care of the internal memory. 494 if (ctx.m_memData) 495 { 496 // Use result's reserved data to store the memory pointer. 497 498 evmc_get_optional_data(&result)->pointer = ctx.m_memData; 499 500 // Set pointer to the destructor that will release the memory. 501 result.release = [](evmc_result const* r) 502 { 503 std::free(evmc_get_const_optional_data(r)->pointer); 504 }; 505 ctx.m_memData = nullptr; 506 } 507 508 jit.currentMsg = prevMsg; 509 return result; 510 } 511 512 static int setOption(evmc_instance* instance, const char* name, const char* value) noexcept 513 { 514 try 515 { 516 if (name == std::string{"hits-threshold"}) 517 { 518 auto& jit = static_cast<JITImpl&>(*instance); 519 jit.hitThreshold = std::stoul(value); 520 return 1; 521 } 522 return 0; 523 } 524 catch (...) 525 { 526 return 0; 527 } 528 } 529 530 } // extern "C" 531 532 void JITImpl::createEngine() 533 { 534 auto module = llvm::make_unique<llvm::Module>("", getLLVMContext()); 535 536 // FIXME: LLVM 3.7: test on Windows 537 auto triple = llvm::Triple(llvm::sys::getProcessTriple()); 538 if (triple.getOS() == llvm::Triple::OSType::Win32) 539 triple.setObjectFormat(llvm::Triple::ObjectFormatType::ELF); // MCJIT does not support COFF format 540 module->setTargetTriple(triple.str()); 541 542 llvm::EngineBuilder builder(std::move(module)); 543 builder.setEngineKind(llvm::EngineKind::JIT); 544 auto memoryMgr = llvm::make_unique<SymbolResolver>(); 545 m_memoryMgr = memoryMgr.get(); 546 builder.setMCJITMemoryManager(std::move(memoryMgr)); 547 builder.setOptLevel(g_optimize ? llvm::CodeGenOpt::Default : llvm::CodeGenOpt::None); 548 #ifndef NDEBUG 549 builder.setVerifyModules(true); 550 #endif 551 552 m_engine.reset(builder.create()); 553 554 // TODO: Update cache listener 555 m_engine->setObjectCache(Cache::init(g_cache, nullptr)); 556 557 // FIXME: Disabled during API changes 558 //if (preloadCache) 559 // Cache::preload(*m_engine, funcCache); 560 } 561 562 JITImpl::JITImpl() 563 : evmc_instance({ 564 EVMC_ABI_VERSION, 565 "evmjit", 566 EVMJIT_VERSION, 567 evmjit::destroy, 568 evmjit::execute, 569 evmjit::setOption, 570 }) 571 { 572 parseOptions(); 573 574 bool preloadCache = g_cache == CacheMode::preload; 575 if (preloadCache) 576 g_cache = CacheMode::on; 577 578 llvm::InitializeNativeTarget(); 579 llvm::InitializeNativeTargetAsmPrinter(); 580 581 createEngine(); 582 } 583 584 void JITImpl::checkMemorySize() 585 { 586 constexpr size_t memoryLimit = 1000 * 1024 * 1024; 587 588 if (m_memoryMgr->totalMemorySize() > memoryLimit) 589 { 590 if (g_stats) 591 std::cerr << "EVMJIT reset!\n"; 592 593 std::lock_guard<std::mutex> lock{x_codeMap}; 594 m_codeMap.clear(); 595 m_engine.reset(); 596 createEngine(); 597 } 598 } 599 600 } 601 }