parser.py
1 """ApacheParser is a member object of the ApacheConfigurator class.""" 2 import fnmatch 3 import itertools 4 import logging 5 import os 6 import re 7 import subprocess 8 9 from letsencrypt import errors 10 11 12 logger = logging.getLogger(__name__) 13 14 15 class ApacheParser(object): 16 """Class handles the fine details of parsing the Apache Configuration. 17 18 .. todo:: Make parsing general... remove sites-available etc... 19 20 :ivar str root: Normalized absolute path to the server root 21 directory. Without trailing slash. 22 :ivar str root: Server root 23 :ivar set modules: All module names that are currently enabled. 24 :ivar dict loc: Location to place directives, root - configuration origin, 25 default - user config file, name - NameVirtualHost, 26 27 """ 28 arg_var_interpreter = re.compile(r"\$\{[^ \}]*}") 29 fnmatch_chars = set(["*", "?", "\\", "[", "]"]) 30 31 def __init__(self, aug, root, ctl): 32 # Note: Order is important here. 33 34 # This uses the binary, so it can be done first. 35 # https://httpd.apache.org/docs/2.4/mod/core.html#define 36 # https://httpd.apache.org/docs/2.4/mod/core.html#ifdefine 37 # This only handles invocation parameters and Define directives! 38 self.variables = {} 39 self.update_runtime_variables(ctl) 40 41 self.aug = aug 42 # Find configuration root and make sure augeas can parse it. 43 self.root = os.path.abspath(root) 44 self.loc = {"root": self._find_config_root()} 45 self._parse_file(self.loc["root"]) 46 47 # This problem has been fixed in Augeas 1.0 48 self.standardize_excl() 49 50 # Temporarily set modules to be empty, so that find_dirs can work 51 # https://httpd.apache.org/docs/2.4/mod/core.html#ifmodule 52 # This needs to come before locations are set. 53 self.modules = set() 54 self.init_modules() 55 56 # Set up rest of locations 57 self.loc.update(self._set_locations()) 58 59 # Must also attempt to parse sites-available or equivalent 60 # Sites-available is not included naturally in configuration 61 self._parse_file(os.path.join(self.root, "sites-available") + "/*") 62 63 def init_modules(self): 64 """Iterates on the configuration until no new modules are loaded. 65 66 ..todo:: This should be attempted to be done with a binary to avoid 67 the iteration issue. Else... parse and enable mods at same time. 68 69 """ 70 # Since modules are being initiated... clear existing set. 71 self.modules = set() 72 matches = self.find_dir("LoadModule") 73 74 iterator = iter(matches) 75 # Make sure prev_size != cur_size for do: while: iteration 76 prev_size = -1 77 78 while len(self.modules) != prev_size: 79 prev_size = len(self.modules) 80 81 for match_name, match_filename in itertools.izip( 82 iterator, iterator): 83 self.modules.add(self.get_arg(match_name)) 84 self.modules.add( 85 os.path.basename(self.get_arg(match_filename))[:-2] + "c") 86 87 def update_runtime_variables(self, ctl): 88 """" 89 90 .. note:: Compile time variables (apache2ctl -V) are not used within the 91 dynamic configuration files. These should not be parsed or 92 interpreted. 93 94 .. todo:: Create separate compile time variables... simply for arg_get() 95 96 """ 97 stdout = self._get_runtime_cfg(ctl) 98 99 variables = dict() 100 matches = re.compile(r"Define: ([^ \n]*)").findall(stdout) 101 try: 102 matches.remove("DUMP_RUN_CFG") 103 except ValueError: 104 raise errors.PluginError("Unable to parse runtime variables") 105 106 for match in matches: 107 if match.count("=") > 1: 108 logger.error("Unexpected number of equal signs in " 109 "apache2ctl -D DUMP_RUN_CFG") 110 raise errors.PluginError( 111 "Error parsing Apache runtime variables") 112 parts = match.partition("=") 113 variables[parts[0]] = parts[2] 114 115 self.variables = variables 116 117 def _get_runtime_cfg(self, ctl): # pylint: disable=no-self-use 118 """Get runtime configuration info. 119 120 :returns: stdout from DUMP_RUN_CFG 121 122 """ 123 try: 124 proc = subprocess.Popen( 125 [ctl, "-D", "DUMP_RUN_CFG"], 126 stdout=subprocess.PIPE, 127 stderr=subprocess.PIPE) 128 stdout, stderr = proc.communicate() 129 130 except (OSError, ValueError): 131 logger.error( 132 "Error accessing %s for runtime parameters!%s", ctl, os.linesep) 133 raise errors.MisconfigurationError( 134 "Error accessing loaded Apache parameters: %s", ctl) 135 # Small errors that do not impede 136 if proc.returncode != 0: 137 logger.warn("Error in checking parameter list: %s", stderr) 138 raise errors.MisconfigurationError( 139 "Apache is unable to check whether or not the module is " 140 "loaded because Apache is misconfigured.") 141 142 return stdout 143 144 def filter_args_num(self, matches, args): # pylint: disable=no-self-use 145 """Filter out directives with specific number of arguments. 146 147 This function makes the assumption that all related arguments are given 148 in order. Thus /files/apache/directive[5]/arg[2] must come immediately 149 after /files/apache/directive[5]/arg[1]. Runs in 1 linear pass. 150 151 :param string matches: Matches of all directives with arg nodes 152 :param int args: Number of args you would like to filter 153 154 :returns: List of directives that contain # of arguments. 155 (arg is stripped off) 156 157 """ 158 filtered = [] 159 if args == 1: 160 for i in range(len(matches)): 161 if matches[i].endswith("/arg"): 162 filtered.append(matches[i][:-4]) 163 else: 164 for i in range(len(matches)): 165 if matches[i].endswith("/arg[%d]" % args): 166 # Make sure we don't cause an IndexError (end of list) 167 # Check to make sure arg + 1 doesn't exist 168 if (i == (len(matches) - 1) or 169 not matches[i + 1].endswith("/arg[%d]" % (args + 1))): 170 filtered.append(matches[i][:-len("/arg[%d]" % args)]) 171 172 return filtered 173 174 def add_dir_to_ifmodssl(self, aug_conf_path, directive, args): 175 """Adds directive and value to IfMod ssl block. 176 177 Adds given directive and value along configuration path within 178 an IfMod mod_ssl.c block. If the IfMod block does not exist in 179 the file, it is created. 180 181 :param str aug_conf_path: Desired Augeas config path to add directive 182 :param str directive: Directive you would like to add, e.g. Listen 183 :param args: Values of the directive; str "443" or list of str 184 :type args: list 185 186 """ 187 # TODO: Add error checking code... does the path given even exist? 188 # Does it throw exceptions? 189 if_mod_path = self._get_ifmod(aug_conf_path, "mod_ssl.c") 190 # IfModule can have only one valid argument, so append after 191 self.aug.insert(if_mod_path + "arg", "directive", False) 192 nvh_path = if_mod_path + "directive[1]" 193 self.aug.set(nvh_path, directive) 194 if len(args) == 1: 195 self.aug.set(nvh_path + "/arg", args[0]) 196 else: 197 for i, arg in enumerate(args): 198 self.aug.set("%s/arg[%d]" % (nvh_path, i + 1), arg) 199 200 def _get_ifmod(self, aug_conf_path, mod): 201 """Returns the path to <IfMod mod> and creates one if it doesn't exist. 202 203 :param str aug_conf_path: Augeas configuration path 204 :param str mod: module ie. mod_ssl.c 205 206 """ 207 if_mods = self.aug.match(("%s/IfModule/*[self::arg='%s']" % 208 (aug_conf_path, mod))) 209 if len(if_mods) == 0: 210 self.aug.set("%s/IfModule[last() + 1]" % aug_conf_path, "") 211 self.aug.set("%s/IfModule[last()]/arg" % aug_conf_path, mod) 212 if_mods = self.aug.match(("%s/IfModule/*[self::arg='%s']" % 213 (aug_conf_path, mod))) 214 # Strip off "arg" at end of first ifmod path 215 return if_mods[0][:len(if_mods[0]) - 3] 216 217 def add_dir(self, aug_conf_path, directive, args): 218 """Appends directive to the end fo the file given by aug_conf_path. 219 220 .. note:: Not added to AugeasConfigurator because it may depend 221 on the lens 222 223 :param str aug_conf_path: Augeas configuration path to add directive 224 :param str directive: Directive to add 225 :param args: Value of the directive. ie. Listen 443, 443 is arg 226 :type args: list or str 227 228 """ 229 self.aug.set(aug_conf_path + "/directive[last() + 1]", directive) 230 if isinstance(args, list): 231 for i, value in enumerate(args, 1): 232 self.aug.set( 233 "%s/directive[last()]/arg[%d]" % (aug_conf_path, i), value) 234 else: 235 self.aug.set(aug_conf_path + "/directive[last()]/arg", args) 236 237 def find_dir(self, directive, arg=None, start=None, exclude=True): 238 """Finds directive in the configuration. 239 240 Recursively searches through config files to find directives 241 Directives should be in the form of a case insensitive regex currently 242 243 .. todo:: arg should probably be a list 244 .. todo:: arg search currently only supports direct matching. It does 245 not handle the case of variables or quoted arguments. This should 246 be adapted to use a generic search for the directive and then do a 247 case-insensitive self.get_arg filter 248 249 Note: Augeas is inherently case sensitive while Apache is case 250 insensitive. Augeas 1.0 allows case insensitive regexes like 251 regexp(/Listen/, "i"), however the version currently supported 252 by Ubuntu 0.10 does not. Thus I have included my own case insensitive 253 transformation by calling case_i() on everything to maintain 254 compatibility. 255 256 :param str directive: Directive to look for 257 :param arg: Specific value directive must have, None if all should 258 be considered 259 :type arg: str or None 260 261 :param str start: Beginning Augeas path to begin looking 262 :param bool exclude: Whether or not to exclude directives based on 263 variables and enabled modules 264 265 """ 266 # Cannot place member variable in the definition of the function so... 267 if not start: 268 start = get_aug_path(self.loc["root"]) 269 270 # No regexp code 271 # if arg is None: 272 # matches = self.aug.match(start + 273 # "//*[self::directive='" + directive + "']/arg") 274 # else: 275 # matches = self.aug.match(start + 276 # "//*[self::directive='" + directive + 277 # "']/* [self::arg='" + arg + "']") 278 279 # includes = self.aug.match(start + 280 # "//* [self::directive='Include']/* [label()='arg']") 281 282 regex = "(%s)|(%s)|(%s)" % (case_i(directive), 283 case_i("Include"), 284 case_i("IncludeOptional")) 285 matches = self.aug.match( 286 "%s//*[self::directive=~regexp('%s')]" % (start, regex)) 287 288 if exclude: 289 matches = self._exclude_dirs(matches) 290 291 if arg is None: 292 arg_suffix = "/arg" 293 else: 294 arg_suffix = "/*[self::arg=~regexp('%s')]" % case_i(arg) 295 296 ordered_matches = [] 297 298 # TODO: Wildcards should be included in alphabetical order 299 # https://httpd.apache.org/docs/2.4/mod/core.html#include 300 for match in matches: 301 dir_ = self.aug.get(match).lower() 302 if dir_ == "include" or dir_ == "includeoptional": 303 # start[6:] to strip off /files 304 #print self._get_include_path(self.get_arg(match +"/arg")), directive, arg 305 ordered_matches.extend(self.find_dir( 306 directive, arg, 307 self._get_include_path(self.get_arg(match + "/arg")), 308 exclude)) 309 # This additionally allows Include 310 if dir_ == directive.lower(): 311 ordered_matches.extend(self.aug.match(match + arg_suffix)) 312 313 return ordered_matches 314 315 def get_arg(self, match): 316 """Uses augeas.get to get argument value and interprets result. 317 318 This also converts all variables and parameters appropriately. 319 320 """ 321 value = self.aug.get(match) 322 323 # No need to strip quotes for variables, as apache2ctl already does this 324 # but we do need to strip quotes for all normal arguments. 325 326 # Note: normal argument may be a quoted variable 327 # e.g. strip now, not later 328 value = value.strip("'\"") 329 330 variables = ApacheParser.arg_var_interpreter.findall(value) 331 332 for var in variables: 333 # Strip off ${ and } 334 try: 335 value = value.replace(var, self.variables[var[2:-1]]) 336 except KeyError: 337 raise errors.PluginError("Error Parsing variable: %s" % var) 338 339 return value 340 341 def _exclude_dirs(self, matches): 342 """Exclude directives that are not loaded into the configuration.""" 343 filters = [("ifmodule", self.modules), ("ifdefine", self.variables)] 344 345 valid_matches = [] 346 347 for match in matches: 348 for filter_ in filters: 349 if not self._pass_filter(match, filter_): 350 break 351 else: 352 valid_matches.append(match) 353 return valid_matches 354 355 def _pass_filter(self, match, filter_): 356 """Determine if directive passes a filter. 357 358 :param str match: Augeas path 359 :param list filter: list of tuples of form 360 [("lowercase if directive", set of relevant parameters)] 361 362 """ 363 match_l = match.lower() 364 last_match_idx = match_l.find(filter_[0]) 365 366 while last_match_idx != -1: 367 # Check args 368 end_of_if = match_l.find("/", last_match_idx) 369 # This should be aug.get (vars are not used e.g. parser.aug_get) 370 expression = self.aug.get(match[:end_of_if] + "/arg") 371 372 if expression.startswith("!"): 373 # Strip off "!" 374 if expression[1:] in filter_[1]: 375 return False 376 else: 377 if expression not in filter_[1]: 378 return False 379 380 last_match_idx = match_l.find(filter_[0], end_of_if) 381 382 return True 383 384 def _get_include_path(self, arg): 385 """Converts an Apache Include directive into Augeas path. 386 387 Converts an Apache Include directive argument into an Augeas 388 searchable path 389 390 .. todo:: convert to use os.path.join() 391 392 :param str arg: Argument of Include directive 393 394 :returns: Augeas path string 395 :rtype: str 396 397 """ 398 # Check to make sure only expected characters are used <- maybe remove 399 # validChars = re.compile("[a-zA-Z0-9.*?_-/]*") 400 # matchObj = validChars.match(arg) 401 # if matchObj.group() != arg: 402 # logger.error("Error: Invalid regexp characters in %s", arg) 403 # return [] 404 405 # Remove beginning and ending quotes 406 arg = arg.strip("'\"") 407 408 # Standardize the include argument based on server root 409 if not arg.startswith("/"): 410 # Normpath will condense ../ 411 arg = os.path.normpath(os.path.join(self.root, arg)) 412 else: 413 arg = os.path.normpath(arg) 414 415 # Attempts to add a transform to the file if one does not already exist 416 if os.path.isdir(arg): 417 self._parse_file(os.path.join(arg, "*")) 418 else: 419 self._parse_file(arg) 420 421 # Argument represents an fnmatch regular expression, convert it 422 # Split up the path and convert each into an Augeas accepted regex 423 # then reassemble 424 split_arg = arg.split("/") 425 for idx, split in enumerate(split_arg): 426 if any(char in ApacheParser.fnmatch_chars for char in split): 427 # Turn it into a augeas regex 428 # TODO: Can this instead be an augeas glob instead of regex 429 split_arg[idx] = ("* [label()=~regexp('%s')]" % 430 self.fnmatch_to_re(split)) 431 # Reassemble the argument 432 # Note: This also normalizes the argument /serverroot/ -> /serverroot 433 arg = "/".join(split_arg) 434 435 return get_aug_path(arg) 436 437 def fnmatch_to_re(self, clean_fn_match): # pylint: disable=no-self-use 438 """Method converts Apache's basic fnmatch to regular expression. 439 440 Assumption - Configs are assumed to be well-formed and only writable by 441 privileged users. 442 443 https://apr.apache.org/docs/apr/2.0/apr__fnmatch_8h_source.html 444 http://apache2.sourcearchive.com/documentation/2.2.16-6/apr__fnmatch_8h_source.html 445 446 :param str clean_fn_match: Apache style filename match, similar to globs 447 448 :returns: regex suitable for augeas 449 :rtype: str 450 451 """ 452 # This strips off final /Z(?ms) 453 return fnmatch.translate(clean_fn_match)[:-7] 454 455 def _parse_file(self, filepath): 456 """Parse file with Augeas 457 458 Checks to see if file_path is parsed by Augeas 459 If filepath isn't parsed, the file is added and Augeas is reloaded 460 461 :param str filepath: Apache config file path 462 463 """ 464 # Test if augeas included file for Httpd.lens 465 # Note: This works for augeas globs, ie. *.conf 466 inc_test = self.aug.match( 467 "/augeas/load/Httpd/incl [. ='%s']" % filepath) 468 if not inc_test: 469 # Load up files 470 # This doesn't seem to work on TravisCI 471 # self.aug.add_transform("Httpd.lns", [filepath]) 472 self._add_httpd_transform(filepath) 473 self.aug.load() 474 475 def _add_httpd_transform(self, incl): 476 """Add a transform to Augeas. 477 478 This function will correctly add a transform to augeas 479 The existing augeas.add_transform in python doesn't seem to work for 480 Travis CI as it loads in libaugeas.so.0.10.0 481 482 :param str incl: filepath to include for transform 483 484 """ 485 last_include = self.aug.match("/augeas/load/Httpd/incl [last()]") 486 if last_include: 487 # Insert a new node immediately after the last incl 488 self.aug.insert(last_include[0], "incl", False) 489 self.aug.set("/augeas/load/Httpd/incl[last()]", incl) 490 # On first use... must load lens and add file to incl 491 else: 492 # Augeas uses base 1 indexing... insert at beginning... 493 self.aug.set("/augeas/load/Httpd/lens", "Httpd.lns") 494 self.aug.set("/augeas/load/Httpd/incl", incl) 495 496 def standardize_excl(self): 497 """Standardize the excl arguments for the Httpd lens in Augeas. 498 499 Note: Hack! 500 Standardize the excl arguments for the Httpd lens in Augeas 501 Servers sometimes give incorrect defaults 502 Note: This problem should be fixed in Augeas 1.0. Unfortunately, 503 Augeas 0.10 appears to be the most popular version currently. 504 505 """ 506 # attempt to protect against augeas error in 0.10.0 - ubuntu 507 # *.augsave -> /*.augsave upon augeas.load() 508 # Try to avoid bad httpd files 509 # There has to be a better way... but after a day and a half of testing 510 # I had no luck 511 # This is a hack... work around... submit to augeas if still not fixed 512 513 excl = ["*.augnew", "*.augsave", "*.dpkg-dist", "*.dpkg-bak", 514 "*.dpkg-new", "*.dpkg-old", "*.rpmsave", "*.rpmnew", 515 "*~", 516 self.root + "/*.augsave", 517 self.root + "/*~", 518 self.root + "/*/*augsave", 519 self.root + "/*/*~", 520 self.root + "/*/*/*.augsave", 521 self.root + "/*/*/*~"] 522 523 for i, excluded in enumerate(excl, 1): 524 self.aug.set("/augeas/load/Httpd/excl[%d]" % i, excluded) 525 526 self.aug.load() 527 528 def _set_locations(self): 529 """Set default location for directives. 530 531 Locations are given as file_paths 532 .. todo:: Make sure that files are included 533 534 """ 535 default = self._set_user_config_file() 536 537 temp = os.path.join(self.root, "ports.conf") 538 if os.path.isfile(temp): 539 listen = temp 540 name = temp 541 else: 542 listen = default 543 name = default 544 545 return {"default": default, "listen": listen, "name": name} 546 547 def _find_config_root(self): 548 """Find the Apache Configuration Root file.""" 549 location = ["apache2.conf", "httpd.conf"] 550 551 for name in location: 552 if os.path.isfile(os.path.join(self.root, name)): 553 return os.path.join(self.root, name) 554 555 raise errors.NoInstallationError("Could not find configuration root") 556 557 def _set_user_config_file(self): 558 """Set the appropriate user configuration file 559 560 .. todo:: This will have to be updated for other distros versions 561 562 :param str root: pathname which contains the user config 563 564 """ 565 # Basic check to see if httpd.conf exists and 566 # in hierarchy via direct include 567 # httpd.conf was very common as a user file in Apache 2.2 568 if (os.path.isfile(os.path.join(self.root, "httpd.conf")) and 569 self.find_dir("Include", "httpd.conf", self.loc["root"])): 570 return os.path.join(self.root, "httpd.conf") 571 else: 572 return os.path.join(self.root, "apache2.conf") 573 574 575 def case_i(string): 576 """Returns case insensitive regex. 577 578 Returns a sloppy, but necessary version of a case insensitive regex. 579 Any string should be able to be submitted and the string is 580 escaped and then made case insensitive. 581 May be replaced by a more proper /i once augeas 1.0 is widely 582 supported. 583 584 :param str string: string to make case i regex 585 586 """ 587 return "".join(["[" + c.upper() + c.lower() + "]" 588 if c.isalpha() else c for c in re.escape(string)]) 589 590 591 def get_aug_path(file_path): 592 """Return augeas path for full filepath. 593 594 :param str file_path: Full filepath 595 596 """ 597 return "/files%s" % file_path