parser.py
  1  """ApacheParser is a member object of the ApacheConfigurator class."""
  2  import fnmatch
  3  import itertools
  4  import logging
  5  import os
  6  import re
  7  import subprocess
  8  
  9  from letsencrypt import errors
 10  
 11  
 12  logger = logging.getLogger(__name__)
 13  
 14  
 15  class ApacheParser(object):
 16      """Class handles the fine details of parsing the Apache Configuration.
 17  
 18      .. todo:: Make parsing general... remove sites-available etc...
 19  
 20      :ivar str root: Normalized absolute path to the server root
 21          directory. Without trailing slash.
 22      :ivar str root: Server root
 23      :ivar set modules: All module names that are currently enabled.
 24      :ivar dict loc: Location to place directives, root - configuration origin,
 25          default - user config file, name - NameVirtualHost,
 26  
 27      """
 28      arg_var_interpreter = re.compile(r"\$\{[^ \}]*}")
 29      fnmatch_chars = set(["*", "?", "\\", "[", "]"])
 30  
 31      def __init__(self, aug, root, ctl):
 32          # Note: Order is important here.
 33  
 34          # This uses the binary, so it can be done first.
 35          # https://httpd.apache.org/docs/2.4/mod/core.html#define
 36          # https://httpd.apache.org/docs/2.4/mod/core.html#ifdefine
 37          # This only handles invocation parameters and Define directives!
 38          self.variables = {}
 39          self.update_runtime_variables(ctl)
 40  
 41          self.aug = aug
 42          # Find configuration root and make sure augeas can parse it.
 43          self.root = os.path.abspath(root)
 44          self.loc = {"root": self._find_config_root()}
 45          self._parse_file(self.loc["root"])
 46  
 47          # This problem has been fixed in Augeas 1.0
 48          self.standardize_excl()
 49  
 50          # Temporarily set modules to be empty, so that find_dirs can work
 51          # https://httpd.apache.org/docs/2.4/mod/core.html#ifmodule
 52          # This needs to come before locations are set.
 53          self.modules = set()
 54          self.init_modules()
 55  
 56          # Set up rest of locations
 57          self.loc.update(self._set_locations())
 58  
 59          # Must also attempt to parse sites-available or equivalent
 60          # Sites-available is not included naturally in configuration
 61          self._parse_file(os.path.join(self.root, "sites-available") + "/*")
 62  
 63      def init_modules(self):
 64          """Iterates on the configuration until no new modules are loaded.
 65  
 66          ..todo:: This should be attempted to be done with a binary to avoid
 67              the iteration issue.  Else... parse and enable mods at same time.
 68  
 69          """
 70          # Since modules are being initiated... clear existing set.
 71          self.modules = set()
 72          matches = self.find_dir("LoadModule")
 73  
 74          iterator = iter(matches)
 75          # Make sure prev_size != cur_size for do: while: iteration
 76          prev_size = -1
 77  
 78          while len(self.modules) != prev_size:
 79              prev_size = len(self.modules)
 80  
 81              for match_name, match_filename in itertools.izip(
 82                      iterator, iterator):
 83                  self.modules.add(self.get_arg(match_name))
 84                  self.modules.add(
 85                      os.path.basename(self.get_arg(match_filename))[:-2] + "c")
 86  
 87      def update_runtime_variables(self, ctl):
 88          """"
 89  
 90          .. note:: Compile time variables (apache2ctl -V) are not used within the
 91              dynamic configuration files.  These should not be parsed or
 92              interpreted.
 93  
 94          .. todo:: Create separate compile time variables... simply for arg_get()
 95  
 96          """
 97          stdout = self._get_runtime_cfg(ctl)
 98  
 99          variables = dict()
100          matches = re.compile(r"Define: ([^ \n]*)").findall(stdout)
101          try:
102              matches.remove("DUMP_RUN_CFG")
103          except ValueError:
104              raise errors.PluginError("Unable to parse runtime variables")
105  
106          for match in matches:
107              if match.count("=") > 1:
108                  logger.error("Unexpected number of equal signs in "
109                               "apache2ctl -D DUMP_RUN_CFG")
110                  raise errors.PluginError(
111                      "Error parsing Apache runtime variables")
112              parts = match.partition("=")
113              variables[parts[0]] = parts[2]
114  
115          self.variables = variables
116  
117      def _get_runtime_cfg(self, ctl):  # pylint: disable=no-self-use
118          """Get runtime configuration info.
119  
120          :returns: stdout from DUMP_RUN_CFG
121  
122          """
123          try:
124              proc = subprocess.Popen(
125                  [ctl, "-D", "DUMP_RUN_CFG"],
126                  stdout=subprocess.PIPE,
127                  stderr=subprocess.PIPE)
128              stdout, stderr = proc.communicate()
129  
130          except (OSError, ValueError):
131              logger.error(
132                  "Error accessing %s for runtime parameters!%s", ctl, os.linesep)
133              raise errors.MisconfigurationError(
134                  "Error accessing loaded Apache parameters: %s", ctl)
135          # Small errors that do not impede
136          if proc.returncode != 0:
137              logger.warn("Error in checking parameter list: %s", stderr)
138              raise errors.MisconfigurationError(
139                  "Apache is unable to check whether or not the module is "
140                  "loaded because Apache is misconfigured.")
141  
142          return stdout
143  
144      def filter_args_num(self, matches, args):  # pylint: disable=no-self-use
145          """Filter out directives with specific number of arguments.
146  
147          This function makes the assumption that all related arguments are given
148          in order.  Thus /files/apache/directive[5]/arg[2] must come immediately
149          after /files/apache/directive[5]/arg[1]. Runs in 1 linear pass.
150  
151          :param string matches: Matches of all directives with arg nodes
152          :param int args: Number of args you would like to filter
153  
154          :returns: List of directives that contain # of arguments.
155              (arg is stripped off)
156  
157          """
158          filtered = []
159          if args == 1:
160              for i in range(len(matches)):
161                  if matches[i].endswith("/arg"):
162                      filtered.append(matches[i][:-4])
163          else:
164              for i in range(len(matches)):
165                  if matches[i].endswith("/arg[%d]" % args):
166                      # Make sure we don't cause an IndexError (end of list)
167                      # Check to make sure arg + 1 doesn't exist
168                      if (i == (len(matches) - 1) or
169                              not matches[i + 1].endswith("/arg[%d]" % (args + 1))):
170                          filtered.append(matches[i][:-len("/arg[%d]" % args)])
171  
172          return filtered
173  
174      def add_dir_to_ifmodssl(self, aug_conf_path, directive, args):
175          """Adds directive and value to IfMod ssl block.
176  
177          Adds given directive and value along configuration path within
178          an IfMod mod_ssl.c block.  If the IfMod block does not exist in
179          the file, it is created.
180  
181          :param str aug_conf_path: Desired Augeas config path to add directive
182          :param str directive: Directive you would like to add, e.g. Listen
183          :param args: Values of the directive; str "443" or list of str
184          :type args: list
185  
186          """
187          # TODO: Add error checking code... does the path given even exist?
188          #       Does it throw exceptions?
189          if_mod_path = self._get_ifmod(aug_conf_path, "mod_ssl.c")
190          # IfModule can have only one valid argument, so append after
191          self.aug.insert(if_mod_path + "arg", "directive", False)
192          nvh_path = if_mod_path + "directive[1]"
193          self.aug.set(nvh_path, directive)
194          if len(args) == 1:
195              self.aug.set(nvh_path + "/arg", args[0])
196          else:
197              for i, arg in enumerate(args):
198                  self.aug.set("%s/arg[%d]" % (nvh_path, i + 1), arg)
199  
200      def _get_ifmod(self, aug_conf_path, mod):
201          """Returns the path to <IfMod mod> and creates one if it doesn't exist.
202  
203          :param str aug_conf_path: Augeas configuration path
204          :param str mod: module ie. mod_ssl.c
205  
206          """
207          if_mods = self.aug.match(("%s/IfModule/*[self::arg='%s']" %
208                                    (aug_conf_path, mod)))
209          if len(if_mods) == 0:
210              self.aug.set("%s/IfModule[last() + 1]" % aug_conf_path, "")
211              self.aug.set("%s/IfModule[last()]/arg" % aug_conf_path, mod)
212              if_mods = self.aug.match(("%s/IfModule/*[self::arg='%s']" %
213                                        (aug_conf_path, mod)))
214          # Strip off "arg" at end of first ifmod path
215          return if_mods[0][:len(if_mods[0]) - 3]
216  
217      def add_dir(self, aug_conf_path, directive, args):
218          """Appends directive to the end fo the file given by aug_conf_path.
219  
220          .. note:: Not added to AugeasConfigurator because it may depend
221              on the lens
222  
223          :param str aug_conf_path: Augeas configuration path to add directive
224          :param str directive: Directive to add
225          :param args: Value of the directive. ie. Listen 443, 443 is arg
226          :type args: list or str
227  
228          """
229          self.aug.set(aug_conf_path + "/directive[last() + 1]", directive)
230          if isinstance(args, list):
231              for i, value in enumerate(args, 1):
232                  self.aug.set(
233                      "%s/directive[last()]/arg[%d]" % (aug_conf_path, i), value)
234          else:
235              self.aug.set(aug_conf_path + "/directive[last()]/arg", args)
236  
237      def find_dir(self, directive, arg=None, start=None, exclude=True):
238          """Finds directive in the configuration.
239  
240          Recursively searches through config files to find directives
241          Directives should be in the form of a case insensitive regex currently
242  
243          .. todo:: arg should probably be a list
244          .. todo:: arg search currently only supports direct matching. It does
245              not handle the case of variables or quoted arguments. This should
246              be adapted to use a generic search for the directive and then do a
247              case-insensitive self.get_arg filter
248  
249          Note: Augeas is inherently case sensitive while Apache is case
250          insensitive.  Augeas 1.0 allows case insensitive regexes like
251          regexp(/Listen/, "i"), however the version currently supported
252          by Ubuntu 0.10 does not.  Thus I have included my own case insensitive
253          transformation by calling case_i() on everything to maintain
254          compatibility.
255  
256          :param str directive: Directive to look for
257          :param arg: Specific value directive must have, None if all should
258                      be considered
259          :type arg: str or None
260  
261          :param str start: Beginning Augeas path to begin looking
262          :param bool exclude: Whether or not to exclude directives based on
263              variables and enabled modules
264  
265          """
266          # Cannot place member variable in the definition of the function so...
267          if not start:
268              start = get_aug_path(self.loc["root"])
269  
270          # No regexp code
271          # if arg is None:
272          #     matches = self.aug.match(start +
273          # "//*[self::directive='" + directive + "']/arg")
274          # else:
275          #     matches = self.aug.match(start +
276          # "//*[self::directive='" + directive +
277          #   "']/* [self::arg='" + arg + "']")
278  
279          # includes = self.aug.match(start +
280          # "//* [self::directive='Include']/* [label()='arg']")
281  
282          regex = "(%s)|(%s)|(%s)" % (case_i(directive),
283                                      case_i("Include"),
284                                      case_i("IncludeOptional"))
285          matches = self.aug.match(
286              "%s//*[self::directive=~regexp('%s')]" % (start, regex))
287  
288          if exclude:
289              matches = self._exclude_dirs(matches)
290  
291          if arg is None:
292              arg_suffix = "/arg"
293          else:
294              arg_suffix = "/*[self::arg=~regexp('%s')]" % case_i(arg)
295  
296          ordered_matches = []
297  
298          # TODO: Wildcards should be included in alphabetical order
299          # https://httpd.apache.org/docs/2.4/mod/core.html#include
300          for match in matches:
301              dir_ = self.aug.get(match).lower()
302              if dir_ == "include" or dir_ == "includeoptional":
303                  # start[6:] to strip off /files
304                  #print self._get_include_path(self.get_arg(match +"/arg")), directive, arg
305                  ordered_matches.extend(self.find_dir(
306                      directive, arg,
307                      self._get_include_path(self.get_arg(match + "/arg")),
308                      exclude))
309              # This additionally allows Include
310              if dir_ == directive.lower():
311                  ordered_matches.extend(self.aug.match(match + arg_suffix))
312  
313          return ordered_matches
314  
315      def get_arg(self, match):
316          """Uses augeas.get to get argument value and interprets result.
317  
318          This also converts all variables and parameters appropriately.
319  
320          """
321          value = self.aug.get(match)
322  
323          # No need to strip quotes for variables, as apache2ctl already does this
324          # but we do need to strip quotes for all normal arguments.
325  
326          # Note: normal argument may be a quoted variable
327          # e.g. strip now, not later
328          value = value.strip("'\"")
329  
330          variables = ApacheParser.arg_var_interpreter.findall(value)
331  
332          for var in variables:
333              # Strip off ${ and }
334              try:
335                  value = value.replace(var, self.variables[var[2:-1]])
336              except KeyError:
337                  raise errors.PluginError("Error Parsing variable: %s" % var)
338  
339          return value
340  
341      def _exclude_dirs(self, matches):
342          """Exclude directives that are not loaded into the configuration."""
343          filters = [("ifmodule", self.modules), ("ifdefine", self.variables)]
344  
345          valid_matches = []
346  
347          for match in matches:
348              for filter_ in filters:
349                  if not self._pass_filter(match, filter_):
350                      break
351              else:
352                  valid_matches.append(match)
353          return valid_matches
354  
355      def _pass_filter(self, match, filter_):
356          """Determine if directive passes a filter.
357  
358          :param str match: Augeas path
359          :param list filter: list of tuples of form
360              [("lowercase if directive", set of relevant parameters)]
361  
362          """
363          match_l = match.lower()
364          last_match_idx = match_l.find(filter_[0])
365  
366          while last_match_idx != -1:
367              # Check args
368              end_of_if = match_l.find("/", last_match_idx)
369              # This should be aug.get (vars are not used e.g. parser.aug_get)
370              expression = self.aug.get(match[:end_of_if] + "/arg")
371  
372              if expression.startswith("!"):
373                  # Strip off "!"
374                  if expression[1:] in filter_[1]:
375                      return False
376              else:
377                  if expression not in filter_[1]:
378                      return False
379  
380              last_match_idx = match_l.find(filter_[0], end_of_if)
381  
382          return True
383  
384      def _get_include_path(self, arg):
385          """Converts an Apache Include directive into Augeas path.
386  
387          Converts an Apache Include directive argument into an Augeas
388          searchable path
389  
390          .. todo:: convert to use os.path.join()
391  
392          :param str arg: Argument of Include directive
393  
394          :returns: Augeas path string
395          :rtype: str
396  
397          """
398          # Check to make sure only expected characters are used <- maybe remove
399          # validChars = re.compile("[a-zA-Z0-9.*?_-/]*")
400          # matchObj = validChars.match(arg)
401          # if matchObj.group() != arg:
402          #     logger.error("Error: Invalid regexp characters in %s", arg)
403          #     return []
404  
405          # Remove beginning and ending quotes
406          arg = arg.strip("'\"")
407  
408          # Standardize the include argument based on server root
409          if not arg.startswith("/"):
410              # Normpath will condense ../
411              arg = os.path.normpath(os.path.join(self.root, arg))
412          else:
413              arg = os.path.normpath(arg)
414  
415          # Attempts to add a transform to the file if one does not already exist
416          if os.path.isdir(arg):
417              self._parse_file(os.path.join(arg, "*"))
418          else:
419              self._parse_file(arg)
420  
421          # Argument represents an fnmatch regular expression, convert it
422          # Split up the path and convert each into an Augeas accepted regex
423          # then reassemble
424          split_arg = arg.split("/")
425          for idx, split in enumerate(split_arg):
426              if any(char in ApacheParser.fnmatch_chars for char in split):
427                  # Turn it into a augeas regex
428                  # TODO: Can this instead be an augeas glob instead of regex
429                  split_arg[idx] = ("* [label()=~regexp('%s')]" %
430                                    self.fnmatch_to_re(split))
431          # Reassemble the argument
432          # Note: This also normalizes the argument /serverroot/ -> /serverroot
433          arg = "/".join(split_arg)
434  
435          return get_aug_path(arg)
436  
437      def fnmatch_to_re(self, clean_fn_match):  # pylint: disable=no-self-use
438          """Method converts Apache's basic fnmatch to regular expression.
439  
440          Assumption - Configs are assumed to be well-formed and only writable by
441          privileged users.
442  
443          https://apr.apache.org/docs/apr/2.0/apr__fnmatch_8h_source.html
444          http://apache2.sourcearchive.com/documentation/2.2.16-6/apr__fnmatch_8h_source.html
445  
446          :param str clean_fn_match: Apache style filename match, similar to globs
447  
448          :returns: regex suitable for augeas
449          :rtype: str
450  
451          """
452          # This strips off final /Z(?ms)
453          return fnmatch.translate(clean_fn_match)[:-7]
454  
455      def _parse_file(self, filepath):
456          """Parse file with Augeas
457  
458          Checks to see if file_path is parsed by Augeas
459          If filepath isn't parsed, the file is added and Augeas is reloaded
460  
461          :param str filepath: Apache config file path
462  
463          """
464          # Test if augeas included file for Httpd.lens
465          # Note: This works for augeas globs, ie. *.conf
466          inc_test = self.aug.match(
467              "/augeas/load/Httpd/incl [. ='%s']" % filepath)
468          if not inc_test:
469              # Load up files
470              # This doesn't seem to work on TravisCI
471              # self.aug.add_transform("Httpd.lns", [filepath])
472              self._add_httpd_transform(filepath)
473              self.aug.load()
474  
475      def _add_httpd_transform(self, incl):
476          """Add a transform to Augeas.
477  
478          This function will correctly add a transform to augeas
479          The existing augeas.add_transform in python doesn't seem to work for
480          Travis CI as it loads in libaugeas.so.0.10.0
481  
482          :param str incl: filepath to include for transform
483  
484          """
485          last_include = self.aug.match("/augeas/load/Httpd/incl [last()]")
486          if last_include:
487              # Insert a new node immediately after the last incl
488              self.aug.insert(last_include[0], "incl", False)
489              self.aug.set("/augeas/load/Httpd/incl[last()]", incl)
490          # On first use... must load lens and add file to incl
491          else:
492              # Augeas uses base 1 indexing... insert at beginning...
493              self.aug.set("/augeas/load/Httpd/lens", "Httpd.lns")
494              self.aug.set("/augeas/load/Httpd/incl", incl)
495  
496      def standardize_excl(self):
497          """Standardize the excl arguments for the Httpd lens in Augeas.
498  
499          Note: Hack!
500          Standardize the excl arguments for the Httpd lens in Augeas
501          Servers sometimes give incorrect defaults
502          Note: This problem should be fixed in Augeas 1.0.  Unfortunately,
503          Augeas 0.10 appears to be the most popular version currently.
504  
505          """
506          # attempt to protect against augeas error in 0.10.0 - ubuntu
507          # *.augsave -> /*.augsave upon augeas.load()
508          # Try to avoid bad httpd files
509          # There has to be a better way... but after a day and a half of testing
510          # I had no luck
511          # This is a hack... work around... submit to augeas if still not fixed
512  
513          excl = ["*.augnew", "*.augsave", "*.dpkg-dist", "*.dpkg-bak",
514                  "*.dpkg-new", "*.dpkg-old", "*.rpmsave", "*.rpmnew",
515                  "*~",
516                  self.root + "/*.augsave",
517                  self.root + "/*~",
518                  self.root + "/*/*augsave",
519                  self.root + "/*/*~",
520                  self.root + "/*/*/*.augsave",
521                  self.root + "/*/*/*~"]
522  
523          for i, excluded in enumerate(excl, 1):
524              self.aug.set("/augeas/load/Httpd/excl[%d]" % i, excluded)
525  
526          self.aug.load()
527  
528      def _set_locations(self):
529          """Set default location for directives.
530  
531          Locations are given as file_paths
532          .. todo:: Make sure that files are included
533  
534          """
535          default = self._set_user_config_file()
536  
537          temp = os.path.join(self.root, "ports.conf")
538          if os.path.isfile(temp):
539              listen = temp
540              name = temp
541          else:
542              listen = default
543              name = default
544  
545          return {"default": default, "listen": listen, "name": name}
546  
547      def _find_config_root(self):
548          """Find the Apache Configuration Root file."""
549          location = ["apache2.conf", "httpd.conf"]
550  
551          for name in location:
552              if os.path.isfile(os.path.join(self.root, name)):
553                  return os.path.join(self.root, name)
554  
555          raise errors.NoInstallationError("Could not find configuration root")
556  
557      def _set_user_config_file(self):
558          """Set the appropriate user configuration file
559  
560          .. todo:: This will have to be updated for other distros versions
561  
562          :param str root: pathname which contains the user config
563  
564          """
565          # Basic check to see if httpd.conf exists and
566          # in hierarchy via direct include
567          # httpd.conf was very common as a user file in Apache 2.2
568          if (os.path.isfile(os.path.join(self.root, "httpd.conf")) and
569                  self.find_dir("Include", "httpd.conf", self.loc["root"])):
570              return os.path.join(self.root, "httpd.conf")
571          else:
572              return os.path.join(self.root, "apache2.conf")
573  
574  
575  def case_i(string):
576      """Returns case insensitive regex.
577  
578      Returns a sloppy, but necessary version of a case insensitive regex.
579      Any string should be able to be submitted and the string is
580      escaped and then made case insensitive.
581      May be replaced by a more proper /i once augeas 1.0 is widely
582      supported.
583  
584      :param str string: string to make case i regex
585  
586      """
587      return "".join(["[" + c.upper() + c.lower() + "]"
588                      if c.isalpha() else c for c in re.escape(string)])
589  
590  
591  def get_aug_path(file_path):
592      """Return augeas path for full filepath.
593  
594      :param str file_path: Full filepath
595  
596      """
597      return "/files%s" % file_path