parser.py
  1  """NginxParser is a member object of the NginxConfigurator class."""
  2  import glob
  3  import logging
  4  import os
  5  import pyparsing
  6  import re
  7  
  8  from letsencrypt import errors
  9  
 10  from letsencrypt_nginx import obj
 11  from letsencrypt_nginx import nginxparser
 12  
 13  
 14  logger = logging.getLogger(__name__)
 15  
 16  
 17  class NginxParser(object):
 18      """Class handles the fine details of parsing the Nginx Configuration.
 19  
 20      :ivar str root: Normalized abosulte path to the server root
 21          directory. Without trailing slash.
 22      :ivar dict parsed: Mapping of file paths to parsed trees
 23  
 24      """
 25  
 26      def __init__(self, root, ssl_options):
 27          self.parsed = {}
 28          self.root = os.path.abspath(root)
 29          self.loc = self._set_locations(ssl_options)
 30  
 31          # Parse nginx.conf and included files.
 32          # TODO: Check sites-available/ as well. For now, the configurator does
 33          # not enable sites from there.
 34          self.load()
 35  
 36      def load(self):
 37          """Loads Nginx files into a parsed tree.
 38  
 39          """
 40          self.parsed = {}
 41          self._parse_recursively(self.loc["root"])
 42  
 43      def _parse_recursively(self, filepath):
 44          """Parses nginx config files recursively by looking at 'include'
 45          directives inside 'http' and 'server' blocks. Note that this only
 46          reads Nginx files that potentially declare a virtual host.
 47  
 48          :param str filepath: The path to the files to parse, as a glob
 49  
 50          """
 51          filepath = self.abs_path(filepath)
 52          trees = self._parse_files(filepath)
 53          for tree in trees:
 54              for entry in tree:
 55                  if _is_include_directive(entry):
 56                      # Parse the top-level included file
 57                      self._parse_recursively(entry[1])
 58                  elif entry[0] == ['http'] or entry[0] == ['server']:
 59                      # Look for includes in the top-level 'http'/'server' context
 60                      for subentry in entry[1]:
 61                          if _is_include_directive(subentry):
 62                              self._parse_recursively(subentry[1])
 63                          elif entry[0] == ['http'] and subentry[0] == ['server']:
 64                              # Look for includes in a 'server' context within
 65                              # an 'http' context
 66                              for server_entry in subentry[1]:
 67                                  if _is_include_directive(server_entry):
 68                                      self._parse_recursively(server_entry[1])
 69  
 70      def abs_path(self, path):
 71          """Converts a relative path to an absolute path relative to the root.
 72          Does nothing for paths that are already absolute.
 73  
 74          :param str path: The path
 75          :returns: The absolute path
 76          :rtype: str
 77  
 78          """
 79          if not os.path.isabs(path):
 80              return os.path.join(self.root, path)
 81          else:
 82              return path
 83  
 84      def get_vhosts(self):
 85          # pylint: disable=cell-var-from-loop
 86          """Gets list of all 'virtual hosts' found in Nginx configuration.
 87          Technically this is a misnomer because Nginx does not have virtual
 88          hosts, it has 'server blocks'.
 89  
 90          :returns: List of :class:`~letsencrypt_nginx.obj.VirtualHost`
 91              objects found in configuration
 92          :rtype: list
 93  
 94          """
 95          enabled = True  # We only look at enabled vhosts for now
 96          vhosts = []
 97          servers = {}
 98  
 99          for filename in self.parsed:
100              tree = self.parsed[filename]
101              servers[filename] = []
102              srv = servers[filename]  # workaround undefined loop var in lambdas
103  
104              # Find all the server blocks
105              _do_for_subarray(tree, lambda x: x[0] == ['server'],
106                               lambda x: srv.append(x[1]))
107  
108              # Find 'include' statements in server blocks and append their trees
109              for i, server in enumerate(servers[filename]):
110                  new_server = self._get_included_directives(server)
111                  servers[filename][i] = new_server
112  
113          for filename in servers:
114              for server in servers[filename]:
115                  # Parse the server block into a VirtualHost object
116                  parsed_server = _parse_server(server)
117                  vhost = obj.VirtualHost(filename,
118                                          parsed_server['addrs'],
119                                          parsed_server['ssl'],
120                                          enabled,
121                                          parsed_server['names'],
122                                          server)
123                  vhosts.append(vhost)
124  
125          return vhosts
126  
127      def _get_included_directives(self, block):
128          """Returns array with the "include" directives expanded out by
129          concatenating the contents of the included file to the block.
130  
131          :param list block:
132          :rtype: list
133  
134          """
135          result = list(block)  # Copy the list to keep self.parsed idempotent
136          for directive in block:
137              if _is_include_directive(directive):
138                  included_files = glob.glob(
139                      self.abs_path(directive[1]))
140                  for incl in included_files:
141                      try:
142                          result.extend(self.parsed[incl])
143                      except KeyError:
144                          pass
145          return result
146  
147      def _parse_files(self, filepath, override=False):
148          """Parse files from a glob
149  
150          :param str filepath: Nginx config file path
151          :param bool override: Whether to parse a file that has been parsed
152          :returns: list of parsed tree structures
153          :rtype: list
154  
155          """
156          files = glob.glob(filepath)
157          trees = []
158          for item in files:
159              if item in self.parsed and not override:
160                  continue
161              try:
162                  with open(item) as _file:
163                      parsed = nginxparser.load(_file)
164                      self.parsed[item] = parsed
165                      trees.append(parsed)
166              except IOError:
167                  logger.warn("Could not open file: %s", item)
168              except pyparsing.ParseException:
169                  logger.debug("Could not parse file: %s", item)
170          return trees
171  
172      def _set_locations(self, ssl_options):
173          """Set default location for directives.
174  
175          Locations are given as file_paths
176          .. todo:: Make sure that files are included
177  
178          """
179          root = self._find_config_root()
180          default = root
181  
182          nginx_temp = os.path.join(self.root, "nginx_ports.conf")
183          if os.path.isfile(nginx_temp):
184              listen = nginx_temp
185              name = nginx_temp
186          else:
187              listen = default
188              name = default
189  
190          return {"root": root, "default": default, "listen": listen,
191                  "name": name, "ssl_options": ssl_options}
192  
193      def _find_config_root(self):
194          """Find the Nginx Configuration Root file."""
195          location = ['nginx.conf']
196  
197          for name in location:
198              if os.path.isfile(os.path.join(self.root, name)):
199                  return os.path.join(self.root, name)
200  
201          raise errors.NoInstallationError(
202              "Could not find configuration root")
203  
204      def filedump(self, ext='tmp'):
205          """Dumps parsed configurations into files.
206  
207          :param str ext: The file extension to use for the dumped files. If
208              empty, this overrides the existing conf files.
209  
210          """
211          for filename in self.parsed:
212              tree = self.parsed[filename]
213              if ext:
214                  filename = filename + os.path.extsep + ext
215              try:
216                  with open(filename, 'w') as _file:
217                      nginxparser.dump(tree, _file)
218              except IOError:
219                  logger.error("Could not open file for writing: %s", filename)
220  
221      def _has_server_names(self, entry, names):
222          """Checks if a server block has the given set of server_names. This
223          is the primary way of identifying server blocks in the configurator.
224          Returns false if 'entry' doesn't look like a server block at all.
225  
226          ..todo :: Doesn't match server blocks whose server_name directives are
227          split across multiple conf files.
228  
229          :param list entry: The block to search
230          :param set names: The names to match
231          :rtype: bool
232  
233          """
234          if len(names) == 0:
235              # Nothing to identify blocks with
236              return False
237  
238          if not isinstance(entry, list):
239              # Can't be a server block
240              return False
241  
242          new_entry = self._get_included_directives(entry)
243          server_names = set()
244          for item in new_entry:
245              if not isinstance(item, list):
246                  # Can't be a server block
247                  return False
248  
249              if item[0] == 'server_name':
250                  server_names.update(_get_servernames(item[1]))
251  
252          return server_names == names
253  
254      def add_server_directives(self, filename, names, directives,
255                                replace=False):
256          """Add or replace directives in the first server block with names.
257  
258          ..note :: If replace is True, this raises a misconfiguration error
259          if the directive does not already exist.
260  
261          ..todo :: Doesn't match server blocks whose server_name directives are
262              split across multiple conf files.
263  
264          :param str filename: The absolute filename of the config file
265          :param set names: The server_name to match
266          :param list directives: The directives to add
267          :param bool replace: Whether to only replace existing directives
268  
269          """
270          _do_for_subarray(self.parsed[filename],
271                           lambda x: self._has_server_names(x, names),
272                           lambda x: _add_directives(x, directives, replace))
273  
274      def add_http_directives(self, filename, directives):
275          """Adds directives to the first encountered HTTP block in filename.
276  
277          :param str filename: The absolute filename of the config file
278          :param list directives: The directives to add
279  
280          """
281          _do_for_subarray(self.parsed[filename],
282                           lambda x: x[0] == ['http'],
283                           lambda x: _add_directives(x[1], [directives], False))
284  
285      def get_all_certs_keys(self):
286          """Gets all certs and keys in the nginx config.
287  
288          :returns: list of tuples with form [(cert, key, path)]
289              cert - str path to certificate file
290              key - str path to associated key file
291              path - File path to configuration file.
292          :rtype: set
293  
294          """
295          c_k = set()
296          vhosts = self.get_vhosts()
297          for vhost in vhosts:
298              tup = [None, None, vhost.filep]
299              if vhost.ssl:
300                  for directive in vhost.raw:
301                      if directive[0] == 'ssl_certificate':
302                          tup[0] = directive[1]
303                      elif directive[0] == 'ssl_certificate_key':
304                          tup[1] = directive[1]
305              if tup[0] is not None and tup[1] is not None:
306                  c_k.add(tuple(tup))
307          return c_k
308  
309  
310  def _do_for_subarray(entry, condition, func):
311      """Executes a function for a subarray of a nested array if it matches
312      the given condition.
313  
314      :param list entry: The list to iterate over
315      :param function condition: Returns true iff func should be executed on item
316      :param function func: The function to call for each matching item
317  
318      """
319      if isinstance(entry, list):
320          if condition(entry):
321              func(entry)
322          else:
323              for item in entry:
324                  _do_for_subarray(item, condition, func)
325  
326  
327  def get_best_match(target_name, names):
328      """Finds the best match for target_name out of names using the Nginx
329      name-matching rules (exact > longest wildcard starting with * >
330      longest wildcard ending with * > regex).
331  
332      :param str target_name: The name to match
333      :param set names: The candidate server names
334      :returns: Tuple of (type of match, the name that matched)
335      :rtype: tuple
336  
337      """
338      exact = []
339      wildcard_start = []
340      wildcard_end = []
341      regex = []
342  
343      for name in names:
344          if _exact_match(target_name, name):
345              exact.append(name)
346          elif _wildcard_match(target_name, name, True):
347              wildcard_start.append(name)
348          elif _wildcard_match(target_name, name, False):
349              wildcard_end.append(name)
350          elif _regex_match(target_name, name):
351              regex.append(name)
352  
353      if len(exact) > 0:
354          # There can be more than one exact match; e.g. eff.org, .eff.org
355          match = min(exact, key=len)
356          return ('exact', match)
357      if len(wildcard_start) > 0:
358          # Return the longest wildcard
359          match = max(wildcard_start, key=len)
360          return ('wildcard_start', match)
361      if len(wildcard_end) > 0:
362          # Return the longest wildcard
363          match = max(wildcard_end, key=len)
364          return ('wildcard_end', match)
365      if len(regex) > 0:
366          # Just return the first one for now
367          match = regex[0]
368          return ('regex', match)
369  
370      return (None, None)
371  
372  
373  def _exact_match(target_name, name):
374      return target_name == name or '.' + target_name == name
375  
376  
377  def _wildcard_match(target_name, name, start):
378      # Degenerate case
379      if name == '*':
380          return True
381  
382      parts = target_name.split('.')
383      match_parts = name.split('.')
384  
385      # If the domain ends in a wildcard, do the match procedure in reverse
386      if not start:
387          parts.reverse()
388          match_parts.reverse()
389  
390      # The first part must be a wildcard or blank, e.g. '.eff.org'
391      first = match_parts.pop(0)
392      if first != '*' and first != '':
393          return False
394  
395      target_name = '.'.join(parts)
396      name = '.'.join(match_parts)
397  
398      # Ex: www.eff.org matches *.eff.org, eff.org does not match *.eff.org
399      return target_name.endswith('.' + name)
400  
401  
402  def _regex_match(target_name, name):
403      # Must start with a tilde
404      if len(name) < 2 or name[0] != '~':
405          return False
406  
407      # After tilde is a perl-compatible regex
408      try:
409          regex = re.compile(name[1:])
410          if re.match(regex, target_name):
411              return True
412          else:
413              return False
414      except re.error:
415          # perl-compatible regexes are sometimes not recognized by python
416          return False
417  
418  
419  def _is_include_directive(entry):
420      """Checks if an nginx parsed entry is an 'include' directive.
421  
422      :param list entry: the parsed entry
423      :returns: Whether it's an 'include' directive
424      :rtype: bool
425  
426      """
427      return (isinstance(entry, list) and
428              entry[0] == 'include' and len(entry) == 2 and
429              isinstance(entry[1], str))
430  
431  
432  def _get_servernames(names):
433      """Turns a server_name string into a list of server names
434  
435      :param str names: server names
436      :rtype: list
437  
438      """
439      whitespace_re = re.compile(r'\s+')
440      names = re.sub(whitespace_re, ' ', names)
441      return names.split(' ')
442  
443  
444  def _parse_server(server):
445      """Parses a list of server directives.
446  
447      :param list server: list of directives in a server block
448      :rtype: dict
449  
450      """
451      parsed_server = {}
452      parsed_server['addrs'] = set()
453      parsed_server['ssl'] = False
454      parsed_server['names'] = set()
455  
456      for directive in server:
457          if directive[0] == 'listen':
458              addr = obj.Addr.fromstring(directive[1])
459              parsed_server['addrs'].add(addr)
460              if not parsed_server['ssl'] and addr.ssl:
461                  parsed_server['ssl'] = True
462          elif directive[0] == 'server_name':
463              parsed_server['names'].update(
464                  _get_servernames(directive[1]))
465  
466      return parsed_server
467  
468  
469  def _add_directives(block, directives, replace=False):
470      """Adds or replaces directives in a block. If the directive doesn't exist in
471      the entry already, raises a misconfiguration error.
472  
473      ..todo :: Find directives that are in included files.
474  
475      :param list block: The block to replace in
476      :param list directives: The new directives.
477  
478      """
479      for directive in directives:
480          if not replace:
481              # We insert new directives at the top of the block, mostly
482              # to work around https://trac.nginx.org/nginx/ticket/810
483              block.insert(0, directive)
484          else:
485              changed = False
486              if len(directive) == 0:
487                  continue
488              for index, line in enumerate(block):
489                  if len(line) > 0 and line[0] == directive[0]:
490                      block[index] = directive
491                      changed = True
492              if not changed:
493                  raise errors.MisconfigurationError(
494                      'Let\'s Encrypt expected directive for %s in the Nginx '
495                      'config but did not find it.' % directive[0])