/ libevmjit / JIT.cpp
JIT.cpp
  1  #include "JIT.h"
  2  
  3  #include <cstddef>
  4  #include <mutex>
  5  
  6  #include "preprocessor/llvm_includes_start.h"
  7  #include <llvm/IR/Module.h>
  8  #include <llvm/ADT/StringSwitch.h>
  9  #include <llvm/ADT/Triple.h>
 10  #include <llvm/ExecutionEngine/MCJIT.h>
 11  #include <llvm/ExecutionEngine/SectionMemoryManager.h>
 12  #include <llvm/Support/TargetSelect.h>
 13  #include <llvm/Support/raw_os_ostream.h>
 14  #include <evmc/evmc.h>
 15  #include "preprocessor/llvm_includes_end.h"
 16  
 17  #include "Ext.h"
 18  #include "Compiler.h"
 19  #include "Optimizer.h"
 20  #include "Cache.h"
 21  #include "ExecStats.h"
 22  #include "Utils.h"
 23  #include "BuildInfo.gen.h"
 24  
 25  
 26  // FIXME: Move these checks to evmc tests.
 27  static_assert(sizeof(evmc_uint256be) == 32, "evmc_uint256be is too big");
 28  static_assert(sizeof(evmc_address) == 20, "evmc_address is too big");
 29  static_assert(sizeof(evmc_result) == 64, "evmc_result does not fit cache line");
 30  static_assert(sizeof(evmc_message) <= 18*8, "evmc_message not optimally packed");
 31  static_assert(offsetof(evmc_message, code_hash) % 8 == 0, "evmc_message.code_hash not aligned");
 32  
 33  // Check enums match int size.
 34  // On GCC/clang the underlying type should be unsigned int, on MSVC int
 35  static_assert(sizeof(evmc_call_kind)  == sizeof(int), "Enum `evmc_call_kind` is not the size of int");
 36  static_assert(sizeof(evmc_revision)       == sizeof(int), "Enum `evmc_revision` is not the size of int");
 37  
 38  constexpr size_t optionalDataSize = sizeof(evmc_result) - offsetof(evmc_result, create_address);
 39  static_assert(optionalDataSize == sizeof(evmc_result_optional_data), "");
 40  
 41  
 42  namespace dev
 43  {
 44  namespace evmjit
 45  {
 46  using namespace eth::jit;
 47  
 48  namespace
 49  {
 50  using ExecFunc = ReturnCode(*)(ExecutionContext*);
 51  
 52  struct CodeMapEntry
 53  {
 54      ExecFunc func = nullptr;
 55      size_t hits = 0;
 56  
 57      CodeMapEntry() = default;
 58      explicit CodeMapEntry(ExecFunc func) : func(func) {}
 59  };
 60  
 61  char toChar(evmc_revision rev)
 62  {
 63  	switch (rev)
 64  	{
 65  	case EVMC_FRONTIER: return 'F';
 66  	case EVMC_HOMESTEAD: return 'H';
 67  	case EVMC_TANGERINE_WHISTLE: return 'T';
 68  	case EVMC_SPURIOUS_DRAGON: return 'S';
 69  	case EVMC_BYZANTIUM: return 'B';
 70  	case EVMC_CONSTANTINOPLE: return 'C';
 71  	}
 72  	LLVM_BUILTIN_UNREACHABLE;
 73  }
 74  
 75  /// Combine code hash and EVM revision into a printable code identifier.
 76  std::string makeCodeId(evmc_uint256be codeHash, evmc_revision rev, uint32_t flags)
 77  {
 78  	static const auto hexChars = "0123456789abcdef";
 79  	std::string str;
 80  	str.reserve(sizeof(codeHash) * 2 + 1);
 81  	for (auto b: codeHash.bytes)
 82  	{
 83  		str.push_back(hexChars[b >> 4]);
 84  		str.push_back(hexChars[b & 0xf]);
 85  	}
 86  	str.push_back(toChar(rev));
 87  	if (flags & EVMC_STATIC)
 88  		str.push_back('S');
 89  	return str;
 90  }
 91  
 92  void printVersion()
 93  {
 94  	std::cout << "Ethereum EVM JIT Compiler (http://github.com/ethereum/evmjit):\n"
 95  			  << "  EVMJIT version " << EVMJIT_VERSION << "\n"
 96  #ifdef NDEBUG
 97  			  << "  Optimized build, "
 98  #else
 99  			  << "  DEBUG build, "
100  #endif
101  			  << __DATE__ << " (" << __TIME__ << ")\n"
102  			  << std::endl;
103  }
104  
105  namespace cl = llvm::cl;
106  cl::opt<bool> g_optimize{"O", cl::desc{"Optimize"}};
107  cl::opt<CacheMode> g_cache{"cache", cl::desc{"Cache compiled EVM code on disk"},
108  	cl::values(
109  		clEnumValN(CacheMode::off,   "0", "Disabled"),
110  		clEnumValN(CacheMode::on,    "1", "Enabled"),
111  		clEnumValN(CacheMode::read,  "r", "Read only. No new objects are added to cache."),
112  		clEnumValN(CacheMode::write, "w", "Write only. No objects are loaded from cache."),
113  		clEnumValN(CacheMode::clear, "c", "Clear the cache storage. Cache is disabled."),
114  		clEnumValN(CacheMode::preload, "p", "Preload all cached objects."))};
115  cl::opt<bool> g_stats{"st", cl::desc{"Statistics"}};
116  cl::opt<bool> g_dump{"dump", cl::desc{"Dump LLVM IR module"}};
117  
118  void parseOptions()
119  {
120  	static llvm::llvm_shutdown_obj shutdownObj{};
121  	cl::AddExtraVersionPrinter(printVersion);
122  	cl::ParseEnvironmentOptions("evmjit", "EVMJIT", "Ethereum EVM JIT Compiler");
123  }
124  
125  class SymbolResolver;
126  
127  class JITImpl: public evmc_instance
128  {
129  	std::unique_ptr<llvm::ExecutionEngine> m_engine;
130  	SymbolResolver const* m_memoryMgr = nullptr;
131  	mutable std::mutex x_codeMap;
132  	std::unordered_map<std::string, CodeMapEntry> m_codeMap;
133  
134  	static llvm::LLVMContext& getLLVMContext()
135  	{
136  		// TODO: This probably should be thread_local, but for now that causes
137  		// a crash when MCJIT is destroyed.
138  		static llvm::LLVMContext llvmContext;
139  		return llvmContext;
140  	}
141  
142  	void createEngine();
143  
144  public:
145  	static JITImpl& instance()
146  	{
147  		// We need to keep this a singleton.
148  		// so we only call changeVersion on it.
149  		static JITImpl s_instance;
150  		return s_instance;
151  	}
152  
153  	JITImpl();
154  
155  	void checkMemorySize();
156  
157  	llvm::ExecutionEngine& engine() { return *m_engine; }
158  
159  	CodeMapEntry getExecFunc(std::string const& _codeIdentifier);
160  	void mapExecFunc(std::string const& _codeIdentifier, ExecFunc _funcAddr);
161  
162  	ExecFunc compile(evmc_revision _rev, bool _staticCall, byte const* _code, uint64_t _codeSize, std::string const& _codeIdentifier);
163  
164  	evmc_context_fn_table const* host = nullptr;
165  
166  	evmc_message const* currentMsg = nullptr;
167  	std::vector<uint8_t> returnBuffer;
168  
169      std::vector<uint8_t> codeBuffer;
170  
171      size_t hitThreshold = 0;
172  };
173  
174  int64_t call(evmc_context* _ctx, int _kind, int64_t _gas, evmc_address const* _address,
175      evmc_uint256be const* _value, uint8_t const* _inputData, size_t _inputSize, uint8_t* _outputData,
176      size_t _outputSize, uint8_t const** o_bufData, size_t* o_bufSize) noexcept
177  {
178  	// FIXME: Handle unexpected exceptions.
179  	auto& jit = JITImpl::instance();
180  
181  	evmc_message msg;
182  	msg.destination = *_address;
183  	msg.sender = _kind != EVMC_DELEGATECALL ? jit.currentMsg->destination : jit.currentMsg->sender;
184  	msg.value = _kind != EVMC_DELEGATECALL ? *_value : jit.currentMsg->value;
185  	msg.input_data = _inputData;
186  	msg.input_size = _inputSize;
187  	msg.gas = _gas;
188  	msg.depth = jit.currentMsg->depth + 1;
189  	msg.flags = jit.currentMsg->flags;
190  	if (_kind == EVM_STATICCALL)
191  	{
192  		msg.kind = EVMC_CALL;
193  		msg.flags |= EVMC_STATIC;
194  	}
195  	else
196  		msg.kind = static_cast<evmc_call_kind>(_kind);
197  
198  	// FIXME: Handle code hash.
199  	evmc_result result;
200  	jit.host->call(&result, _ctx, &msg);
201  	// FIXME: Clarify when gas_left is valid.
202  	int64_t r = result.gas_left;
203  
204  	// Handle output. It can contain data from RETURN or REVERT opcodes.
205  	auto size = std::min(_outputSize, result.output_size);
206  	std::copy_n(result.output_data, size, _outputData);
207  
208  	// Update RETURNDATA buffer.
209  	// The buffer is already cleared.
210  	jit.returnBuffer = {result.output_data, result.output_data + result.output_size};
211  	*o_bufData = jit.returnBuffer.data();
212  	*o_bufSize = jit.returnBuffer.size();
213  
214  	if (_kind == EVMC_CREATE && result.status_code == EVMC_SUCCESS)
215  		std::copy_n(result.create_address.bytes, sizeof(result.create_address), _outputData);
216  
217  	if (result.status_code != EVMC_SUCCESS)
218  		r |= EVM_CALL_FAILURE;
219  
220  	if (result.release)
221  		result.release(&result);
222  	return r;
223  }
224  
225  
226  /// A wrapper for new EVM-C copycode callback function.
227  size_t getCode(uint8_t** o_pCode, evmc_context* _ctx, evmc_address const* _address) noexcept
228  {
229      auto& jit = JITImpl::instance();
230      size_t codeSize = jit.host->get_code_size(_ctx, _address);
231      jit.codeBuffer.resize(codeSize);  // Allocate needed memory to store the full code.
232  
233      // Copy the code to JIT's buffer and send the buffer reference back to LLVM.
234      size_t size =
235          jit.host->copy_code(_ctx, _address, 0, jit.codeBuffer.data(), jit.codeBuffer.size());
236      *o_pCode = jit.codeBuffer.data();
237      return size;
238  }
239  
240  class SymbolResolver : public llvm::SectionMemoryManager
241  {
242  	llvm::JITSymbol findSymbol(std::string const& _name) override
243  	{
244  		auto& jit = JITImpl::instance();
245  
246  		// Handle symbols' global prefix.
247  		// If in current DataLayout global symbols are prefixed, drop the
248  		// prefix from the name for local search.
249  		char prefix = jit.engine().getDataLayout().getGlobalPrefix();
250  		llvm::StringRef unprefixedName = (prefix != '\0' && _name[0] == prefix)
251  			? llvm::StringRef{_name}.drop_front() : llvm::StringRef{_name};
252  
253          auto addr =
254              llvm::StringSwitch<uint64_t>(unprefixedName)
255                  .Case("env_sha3", reinterpret_cast<uint64_t>(&keccak))
256                  .Case("evm.exists", reinterpret_cast<uint64_t>(jit.host->account_exists))
257                  .Case("evm.sload", reinterpret_cast<uint64_t>(jit.host->get_storage))
258                  .Case("evm.sstore", reinterpret_cast<uint64_t>(jit.host->set_storage))
259                  .Case("evm.balance", reinterpret_cast<uint64_t>(jit.host->get_balance))
260                  .Case("evm.codesize", reinterpret_cast<uint64_t>(jit.host->get_code_size))
261                  .Case("evm.code", reinterpret_cast<uint64_t>(getCode))
262                  .Case("evm.selfdestruct", reinterpret_cast<uint64_t>(jit.host->selfdestruct))
263                  .Case("evm.call", reinterpret_cast<uint64_t>(call))
264                  .Case("evm.get_tx_context", reinterpret_cast<uint64_t>(jit.host->get_tx_context))
265                  .Case("evm.blockhash", reinterpret_cast<uint64_t>(jit.host->get_block_hash))
266                  .Case("evm.log", reinterpret_cast<uint64_t>(jit.host->emit_log))
267                  .Default(0);
268          if (addr)
269              return {addr, llvm::JITSymbolFlags::Exported};
270  
271  		// Fallback to default implementation that would search for the symbol
272  		// in the current process. Use the original prefixed symbol name.
273  		// TODO: In the future we should control the whole set of requested
274  		//       symbols (like memcpy, memset, etc) to improve performance.
275  		return llvm::SectionMemoryManager::findSymbol(_name);
276  	}
277  
278  	void reportMemorySize(size_t _addedSize)
279  	{
280  		m_totalMemorySize += _addedSize;
281  
282  		if (!g_stats)
283  			return;
284  
285  		if (m_totalMemorySize >= m_printMemoryLimit)
286  		{
287  			constexpr size_t printMemoryStep = 10 * 1024 * 1024;
288  			auto value = double(m_totalMemorySize) / printMemoryStep;
289  			std::cerr << "EVMJIT total memory size: " << (10 * value) << " MB\n";
290  			m_printMemoryLimit += printMemoryStep;
291  		}
292  	}
293  
294  	uint8_t* allocateCodeSection(uintptr_t _size, unsigned _a, unsigned _id,
295  	                             llvm::StringRef _name) override
296  	{
297  		reportMemorySize(_size);
298  		return llvm::SectionMemoryManager::allocateCodeSection(_size, _a, _id, _name);
299  	}
300  
301  	uint8_t* allocateDataSection(uintptr_t _size, unsigned _a, unsigned _id,
302  	                             llvm::StringRef _name, bool _ro) override
303  	{
304  		reportMemorySize(_size);
305  		return llvm::SectionMemoryManager::allocateDataSection(_size, _a, _id, _name, _ro);
306  	}
307  
308  	size_t m_totalMemorySize = 0;
309  	size_t m_printMemoryLimit = 1024 * 1024;
310  
311  public:
312  	size_t totalMemorySize() const { return m_totalMemorySize; }
313  };
314  
315  
316  CodeMapEntry JITImpl::getExecFunc(std::string const& _codeIdentifier)
317  {
318      std::lock_guard<std::mutex> lock{x_codeMap};
319      auto& entry = m_codeMap[_codeIdentifier];
320      ++entry.hits;
321      return entry;
322  }
323  
324  void JITImpl::mapExecFunc(std::string const& _codeIdentifier, ExecFunc _funcAddr)
325  {
326      std::lock_guard<std::mutex> lock{x_codeMap};
327      m_codeMap[_codeIdentifier].func = _funcAddr;
328  }
329  
330  ExecFunc JITImpl::compile(evmc_revision _rev, bool _staticCall, byte const* _code, uint64_t _codeSize,
331  	std::string const& _codeIdentifier)
332  {
333  	auto module = Cache::getObject(_codeIdentifier, getLLVMContext());
334  	if (!module)
335  	{
336  		// TODO: Listener support must be redesigned. These should be a feature of JITImpl
337  		//listener->stateChanged(ExecState::Compilation);
338  		assert(_code || !_codeSize);
339  		//TODO: Can the Compiler be stateless?
340  		module = Compiler({}, _rev, _staticCall, getLLVMContext()).compile(_code, _code + _codeSize, _codeIdentifier);
341  
342  		if (g_optimize)
343  		{
344  			//listener->stateChanged(ExecState::Optimization);
345  			optimize(*module);
346  		}
347  
348  		prepare(*module);
349  	}
350  
351  	if (g_dump)
352  	{
353  		llvm::raw_os_ostream cerr{std::cerr};
354  		module->print(cerr, nullptr);
355  	}
356  
357  
358  	m_engine->addModule(std::move(module));
359  	//listener->stateChanged(ExecState::CodeGen);
360  	return (ExecFunc)m_engine->getFunctionAddress(_codeIdentifier);
361  }
362  
363  } // anonymous namespace
364  
365  
366  ExecutionContext::~ExecutionContext() noexcept
367  {
368  	if (m_memData)
369  		std::free(m_memData);
370  }
371  
372  bytes_ref ExecutionContext::getReturnData() const
373  {
374  	auto data = m_data->callData;
375  	auto size = static_cast<size_t>(m_data->callDataSize);
376  
377  	if (data < m_memData || data >= m_memData + m_memSize || size == 0)
378  	{
379  		assert(size == 0); // data can be an invalid pointer only if size is 0
380  		m_data->callData = nullptr;
381  		return {};
382  	}
383  
384  	return bytes_ref{data, size};
385  }
386  
387  extern "C"
388  {
389  
390  EXPORT evmc_instance* evmjit_create()
391  {
392  	// Let's always return the same instance. It's a bit of faking, but actually
393  	// this might be a compliant implementation.
394  	return &JITImpl::instance();
395  }
396  
397  static void destroy(evmc_instance* instance)
398  {
399  	(void)instance;
400  	assert(instance == static_cast<void*>(&JITImpl::instance()));
401  }
402  
403  static evmc_result execute(evmc_instance* instance, evmc_context* context, evmc_revision rev,
404  	evmc_message const* msg, uint8_t const* code, size_t code_size)
405  {
406  	auto& jit = *reinterpret_cast<JITImpl*>(instance);
407  
408  	if (msg->depth == 0)
409  		jit.checkMemorySize();
410  
411  	if (!jit.host)
412  		jit.host = context->fn_table;
413  	assert(jit.host == context->fn_table);  // Require the fn_table not to change.
414  
415  	// TODO: Temporary keep track of the current message.
416  	evmc_message const* prevMsg = jit.currentMsg;
417  	jit.currentMsg = msg;
418  
419  	RuntimeData rt;
420  	rt.code = code;
421  	rt.codeSize = code_size;
422  	rt.gas = msg->gas;
423  	rt.callData = msg->input_data;
424  	rt.callDataSize = msg->input_size;
425  	std::memcpy(&rt.apparentValue, &msg->value, sizeof(msg->value));
426  	std::memset(&rt.address, 0, 12);
427  	std::memcpy(&rt.address[12], &msg->destination, sizeof(msg->destination));
428  	std::memset(&rt.caller, 0, 12);
429  	std::memcpy(&rt.caller[12], &msg->sender, sizeof(msg->sender));
430  	rt.depth = msg->depth;
431  
432  	ExecutionContext ctx{rt, context};
433  
434  	evmc_result result;
435  	result.status_code = EVMC_SUCCESS;
436  	result.gas_left = 0;
437  	result.output_data = nullptr;
438  	result.output_size = 0;
439  	result.release = nullptr;
440  
441      auto codeIdentifier = makeCodeId(msg->code_hash, rev, msg->flags);
442      auto codeEntry = jit.getExecFunc(codeIdentifier);
443      auto func = codeEntry.func;
444      if (!func)
445      {
446          //FIXME: We have a race condition here!
447  
448          if (codeEntry.hits <= jit.hitThreshold)
449          {
450              result.status_code = EVMC_REJECTED;
451              return result;
452          }
453  
454          if (g_stats)
455              std::cerr << "EVMJIT Compile " << codeIdentifier << " (" << codeEntry.hits << ")\n";
456  
457          const bool staticCall = (msg->flags & EVMC_STATIC) != 0;
458          func = jit.compile(rev, staticCall, ctx.code(), ctx.codeSize(), codeIdentifier);
459          if (!func)
460          {
461              result.status_code = EVMC_INTERNAL_ERROR;
462              return result;
463          }
464          jit.mapExecFunc(codeIdentifier, func);
465      }
466  
467      auto returnCode = func(&ctx);
468  
469  	if (returnCode == ReturnCode::Revert)
470  	{
471  		result.status_code = EVMC_REVERT;
472  		result.gas_left = rt.gas;
473  	}
474  	else if (returnCode == ReturnCode::OutOfGas)
475  	{
476  		// EVMJIT does not provide information what exactly type of failure
477  		// it was, so use generic EVM_FAILURE.
478  		result.status_code = EVMC_FAILURE;
479  	}
480  	else
481  	{
482  		// In case of success return the amount of gas left.
483  		result.gas_left = rt.gas;
484  	}
485  
486  	if (returnCode == ReturnCode::Return || returnCode == ReturnCode::Revert)
487  	{
488  		auto out = ctx.getReturnData();
489  		result.output_data = std::get<0>(out);
490  		result.output_size = std::get<1>(out);
491  	}
492  
493  	// Take care of the internal memory.
494  	if (ctx.m_memData)
495  	{
496  		// Use result's reserved data to store the memory pointer.
497  
498  		evmc_get_optional_data(&result)->pointer = ctx.m_memData;
499  
500  		// Set pointer to the destructor that will release the memory.
501  		result.release = [](evmc_result const* r)
502  		{
503  			std::free(evmc_get_const_optional_data(r)->pointer);
504  		};
505  		ctx.m_memData = nullptr;
506  	}
507  
508  	jit.currentMsg = prevMsg;
509  	return result;
510  }
511  
512  static int setOption(evmc_instance* instance, const char* name, const char* value) noexcept
513  {
514      try
515      {
516          if (name == std::string{"hits-threshold"})
517          {
518              auto& jit = static_cast<JITImpl&>(*instance);
519              jit.hitThreshold = std::stoul(value);
520              return 1;
521          }
522          return 0;
523      }
524      catch (...)
525      {
526          return 0;
527      }
528  }
529  
530  }  // extern "C"
531  
532  void JITImpl::createEngine()
533  {
534  	auto module = llvm::make_unique<llvm::Module>("", getLLVMContext());
535  
536  	// FIXME: LLVM 3.7: test on Windows
537  	auto triple = llvm::Triple(llvm::sys::getProcessTriple());
538  	if (triple.getOS() == llvm::Triple::OSType::Win32)
539  		triple.setObjectFormat(llvm::Triple::ObjectFormatType::ELF);  // MCJIT does not support COFF format
540  	module->setTargetTriple(triple.str());
541  
542  	llvm::EngineBuilder builder(std::move(module));
543  	builder.setEngineKind(llvm::EngineKind::JIT);
544  	auto memoryMgr = llvm::make_unique<SymbolResolver>();
545  	m_memoryMgr = memoryMgr.get();
546  	builder.setMCJITMemoryManager(std::move(memoryMgr));
547  	builder.setOptLevel(g_optimize ? llvm::CodeGenOpt::Default : llvm::CodeGenOpt::None);
548  #ifndef NDEBUG
549  	builder.setVerifyModules(true);
550  #endif
551  
552  	m_engine.reset(builder.create());
553  
554  	// TODO: Update cache listener
555  	m_engine->setObjectCache(Cache::init(g_cache, nullptr));
556  
557  	// FIXME: Disabled during API changes
558  	//if (preloadCache)
559  	//	Cache::preload(*m_engine, funcCache);
560  }
561  
562  JITImpl::JITImpl()
563    : evmc_instance({
564          EVMC_ABI_VERSION,
565          "evmjit",
566          EVMJIT_VERSION,
567          evmjit::destroy,
568          evmjit::execute,
569          evmjit::setOption,
570      })
571  {
572  	parseOptions();
573  
574  	bool preloadCache = g_cache == CacheMode::preload;
575  	if (preloadCache)
576  		g_cache = CacheMode::on;
577  
578  	llvm::InitializeNativeTarget();
579  	llvm::InitializeNativeTargetAsmPrinter();
580  
581  	createEngine();
582  }
583  
584  void JITImpl::checkMemorySize()
585  {
586  	constexpr size_t memoryLimit = 1000 * 1024 * 1024;
587  
588  	if (m_memoryMgr->totalMemorySize() > memoryLimit)
589  	{
590  		if (g_stats)
591  			std::cerr << "EVMJIT reset!\n";
592  
593  		std::lock_guard<std::mutex> lock{x_codeMap};
594  		m_codeMap.clear();
595  		m_engine.reset();
596  		createEngine();
597  	}
598  }
599  
600  }
601  }