build_docs.py
1 #!/usr/bin/env python3 2 # coding=utf-8 3 # 4 # Top-level docs builder 5 # 6 # This is just a front-end to sphinx-build that can call it multiple times for different language/target combinations 7 # 8 # Will build out to _build/LANG/TARGET by default 9 # 10 # Specific custom docs functionality should be added in conf_common.py or in a Sphinx extension, not here. 11 # 12 # Copyright 2020 Espressif Systems (Shanghai) PTE LTD 13 # 14 # Licensed under the Apache License, Version 2.0 (the "License"); 15 # you may not use this file except in compliance with the License. 16 # You may obtain a copy of the License at 17 # 18 # http://www.apache.org/licenses/LICENSE-2.0 19 # 20 # Unless required by applicable law or agreed to in writing, software 21 # distributed under the License is distributed on an "AS IS" BASIS, 22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 # See the License for the specific language governing permissions and 24 # limitations under the License. 25 # 26 from __future__ import print_function 27 import argparse 28 import locale 29 import math 30 import multiprocessing 31 import os 32 import os.path 33 import subprocess 34 import sys 35 import re 36 from packaging import version 37 from collections import namedtuple 38 39 LANGUAGES = ["en", "zh_CN"] 40 TARGETS = ["esp32", "esp32s2"] 41 42 SPHINX_WARN_LOG = "sphinx-warning-log.txt" 43 SPHINX_SANITIZED_LOG = "sphinx-warning-log-sanitized.txt" 44 SPHINX_KNOWN_WARNINGS = os.path.join(os.environ["IDF_PATH"], "docs", "sphinx-known-warnings.txt") 45 46 DXG_WARN_LOG = "doxygen-warning-log.txt" 47 DXG_SANITIZED_LOG = "doxygen-warning-log-sanitized.txt" 48 DXG_KNOWN_WARNINGS = os.path.join(os.environ["IDF_PATH"], "docs", "doxygen-known-warnings.txt") 49 DXG_CI_VERSION = version.parse('1.8.11') 50 51 LogMessage = namedtuple("LogMessage", "original_text sanitized_text") 52 53 languages = LANGUAGES 54 targets = TARGETS 55 56 57 def main(): 58 # check Python dependencies for docs 59 try: 60 subprocess.check_call([sys.executable, 61 os.path.join(os.environ["IDF_PATH"], 62 "tools", 63 "check_python_dependencies.py"), 64 "-r", 65 "{}/docs/requirements.txt".format(os.environ["IDF_PATH"]) 66 ]) 67 except subprocess.CalledProcessError: 68 raise SystemExit(2) # stdout will already have these errors 69 70 # This is not the only way to make sure that all files opened by Python are treated as UTF-8, but the other way is passing encoding='utf-8' to all open() 71 # functions and this way makes Python 2 compatibility really tough if there is any code that assumes text files contain strings (kconfiglib assumes this). 72 # The reason for that is that you need to import io.open() to support the encoding argument on Python 2, and this function always uses Py2's unicode 73 # type not the str type. 74 if ('UTF-8' not in locale.getlocale()) and ('utf8' not in locale.getlocale()): 75 raise RuntimeError("build_docs.py requires the default locale's encoding to be UTF-8.\n" + 76 " - Linux. Setting environment variable LC_ALL=C.UTF-8 when running build_docs.py may be " + 77 "enough to fix this.\n" 78 " - Windows. Possible solution for the Windows 10 starting version 1803. Go to " + 79 "Control Panel->Clock and Region->Region->Administrative->Change system locale...; " + 80 "Check `Beta: Use Unicode UTF-8 for worldwide language support` and reboot") 81 82 parser = argparse.ArgumentParser(description='build_docs.py: Build IDF docs', prog='build_docs.py') 83 84 parser.add_argument("--language", "-l", choices=LANGUAGES, required=False) 85 parser.add_argument("--target", "-t", choices=TARGETS, required=False) 86 parser.add_argument("--build-dir", "-b", type=str, default="_build") 87 parser.add_argument("--source-dir", "-s", type=str, default="") 88 parser.add_argument("--builders", "-bs", nargs='+', type=str, default=["html"], 89 help="List of builders for Sphinx, e.g. html or latex, for latex a PDF is also generated") 90 parser.add_argument("--sphinx-parallel-builds", "-p", choices=["auto"] + [str(x) for x in range(8)], 91 help="Parallel Sphinx builds - number of independent Sphinx builds to run", default="auto") 92 parser.add_argument("--sphinx-parallel-jobs", "-j", choices=["auto"] + [str(x) for x in range(8)], 93 help="Sphinx parallel jobs argument - number of threads for each Sphinx build to use", default="1") 94 parser.add_argument("--input-docs", "-i", nargs='+', default=[""], 95 help="List of documents to build relative to the doc base folder, i.e. the language folder. Defaults to all documents") 96 97 action_parsers = parser.add_subparsers(dest='action') 98 99 build_parser = action_parsers.add_parser('build', help='Build documentation') 100 build_parser.add_argument("--check-warnings-only", "-w", action='store_true') 101 102 action_parsers.add_parser('linkcheck', help='Check links (a current IDF revision should be uploaded to GitHub)') 103 104 action_parsers.add_parser('gh-linkcheck', help='Checking for hardcoded GitHub links') 105 106 args = parser.parse_args() 107 108 global languages 109 if args.language is None: 110 print("Building all languages") 111 languages = LANGUAGES 112 else: 113 languages = [args.language] 114 115 global targets 116 if args.target is None: 117 print("Building all targets") 118 targets = TARGETS 119 else: 120 targets = [args.target] 121 122 if args.action == "build" or args.action is None: 123 if args.action is None: 124 args.check_warnings_only = False 125 sys.exit(action_build(args)) 126 127 if args.action == "linkcheck": 128 sys.exit(action_linkcheck(args)) 129 130 if args.action == "gh-linkcheck": 131 sys.exit(action_gh_linkcheck(args)) 132 133 134 def parallel_call(args, callback): 135 num_sphinx_builds = len(languages) * len(targets) 136 num_cpus = multiprocessing.cpu_count() 137 138 if args.sphinx_parallel_builds == "auto": 139 # at most one sphinx build per CPU, up to the number of CPUs 140 args.sphinx_parallel_builds = min(num_sphinx_builds, num_cpus) 141 else: 142 args.sphinx_parallel_builds = int(args.sphinx_parallel_builds) 143 144 # Force -j1 because sphinx works incorrectly 145 args.sphinx_parallel_jobs = 1 146 if args.sphinx_parallel_jobs == "auto": 147 # N CPUs per build job, rounded up - (maybe smarter to round down to avoid contention, idk) 148 args.sphinx_parallel_jobs = int(math.ceil(num_cpus / args.sphinx_parallel_builds)) 149 else: 150 args.sphinx_parallel_jobs = int(args.sphinx_parallel_jobs) 151 152 print("Will use %d parallel builds and %d jobs per build" % (args.sphinx_parallel_builds, args.sphinx_parallel_jobs)) 153 pool = multiprocessing.Pool(args.sphinx_parallel_builds) 154 155 if args.sphinx_parallel_jobs > 1: 156 print("WARNING: Sphinx parallel jobs currently produce incorrect docs output with Sphinx 1.8.5") 157 158 # make a list of all combinations of build_docs() args as tuples 159 # 160 # there's probably a fancy way to do this with itertools but this way is actually readable 161 entries = [] 162 for target in targets: 163 for language in languages: 164 build_dir = os.path.realpath(os.path.join(args.build_dir, language, target)) 165 source_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), args.source_dir, language) 166 167 entries.append((language, target, build_dir, source_dir, args.sphinx_parallel_jobs, args.builders, args.input_docs)) 168 169 print(entries) 170 errcodes = pool.map(callback, entries) 171 print(errcodes) 172 173 is_error = False 174 for ret in errcodes: 175 if ret != 0: 176 print("\nThe following language/target combinations failed to build:") 177 is_error = True 178 break 179 if is_error: 180 for ret, entry in zip(errcodes, entries): 181 if ret != 0: 182 print("language: %s, target: %s, errcode: %d" % (entry[0], entry[1], ret)) 183 # Don't re-throw real error code from each parallel process 184 return 1 185 else: 186 return 0 187 188 189 def sphinx_call(language, target, build_dir, src_dir, sphinx_parallel_jobs, buildername, input_docs): 190 # Note: because this runs in a multiprocessing Process, everything which happens here should be isolated to a single process 191 # (ie it doesn't matter if Sphinx is using global variables, as they're it's own copy of the global variables) 192 193 # wrap stdout & stderr in a way that lets us see which build_docs instance they come from 194 # 195 # this doesn't apply to subprocesses, they write to OS stdout & stderr so no prefix appears 196 prefix = "%s/%s: " % (language, target) 197 198 print("Building in build_dir: %s" % (build_dir)) 199 try: 200 os.makedirs(build_dir) 201 except OSError: 202 pass 203 204 environ = {} 205 environ.update(os.environ) 206 environ['BUILDDIR'] = build_dir 207 208 args = [sys.executable, "-u", "-m", "sphinx.cmd.build", 209 "-j", str(sphinx_parallel_jobs), 210 "-b", buildername, 211 "-d", os.path.join(build_dir, "doctrees"), 212 "-w", SPHINX_WARN_LOG, 213 "-t", target, 214 "-D", "idf_target={}".format(target), 215 "-D", "docs_to_build={}".format(",". join(input_docs)), 216 src_dir, 217 os.path.join(build_dir, buildername) # build directory 218 ] 219 220 saved_cwd = os.getcwd() 221 os.chdir(build_dir) # also run sphinx in the build directory 222 print("Running '%s'" % (" ".join(args))) 223 224 ret = 1 225 try: 226 # Note: we can't call sphinx.cmd.build.main() here as multiprocessing doesn't est >1 layer deep 227 # and sphinx.cmd.build() also does a lot of work in the calling thread, especially for j ==1, 228 # so using a Pyhthon thread for this part is a poor option (GIL) 229 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 230 for c in iter(lambda: p.stdout.readline(), b''): 231 sys.stdout.write(prefix) 232 sys.stdout.write(c.decode('utf-8')) 233 ret = p.wait() 234 assert (ret is not None) 235 sys.stdout.flush() 236 except KeyboardInterrupt: # this seems to be the only way to get Ctrl-C to kill everything? 237 p.kill() 238 os.chdir(saved_cwd) 239 return 130 # FIXME It doesn't return this errorcode, why? Just prints stacktrace 240 os.chdir(saved_cwd) 241 return ret 242 243 244 def action_build(args): 245 if not args.check_warnings_only: 246 ret = parallel_call(args, call_build_docs) 247 if ret != 0: 248 return ret 249 250 251 def check_doxygen_version(): 252 # Different version of doxygen may produce different warnings 253 # This could cause a build to fail locally, but pass CI and vice versa 254 process = subprocess.run(['doxygen', '--version'], encoding='utf-8', stdout=subprocess.PIPE) 255 doxygen_ver = process.stdout.strip() 256 257 if version.parse(doxygen_ver) > DXG_CI_VERSION: 258 print('Local doxygen version {} is newer than CI doxygen version {}. Local build may contain ' 259 'warnings that will not be raised when built by CI.'.format(doxygen_ver, DXG_CI_VERSION)) 260 261 262 def call_build_docs(entry): 263 (language, target, build_dir, src_dir, sphinx_parallel_jobs, builders, input_docs) = entry 264 for buildername in builders: 265 ret = sphinx_call(language, target, build_dir, src_dir, sphinx_parallel_jobs, buildername, input_docs) 266 267 # Warnings are checked after each builder as logs are overwritten 268 # check Doxygen warnings: 269 ret += check_docs(language, target, 270 log_file=os.path.join(build_dir, DXG_WARN_LOG), 271 known_warnings_file=DXG_KNOWN_WARNINGS, 272 out_sanitized_log_file=os.path.join(build_dir, DXG_SANITIZED_LOG)) 273 # check Sphinx warnings: 274 ret += check_docs(language, target, 275 log_file=os.path.join(build_dir, SPHINX_WARN_LOG), 276 known_warnings_file=SPHINX_KNOWN_WARNINGS, 277 out_sanitized_log_file=os.path.join(build_dir, SPHINX_SANITIZED_LOG)) 278 279 if ret != 0: 280 check_doxygen_version() 281 return ret 282 283 # Build PDF from tex 284 if 'latex' in builders: 285 latex_dir = os.path.join(build_dir, "latex") 286 ret = build_pdf(language, target, latex_dir) 287 288 return ret 289 290 291 def build_pdf(language, target, latex_dir): 292 # Note: because this runs in a multiprocessing Process, everything which happens here should be isolated to a single process 293 294 # wrap stdout & stderr in a way that lets us see which build_docs instance they come from 295 # 296 # this doesn't apply to subprocesses, they write to OS stdout & stderr so no prefix appears 297 prefix = "%s/%s: " % (language, target) 298 299 print("Building PDF in latex_dir: %s" % (latex_dir)) 300 301 saved_cwd = os.getcwd() 302 os.chdir(latex_dir) 303 304 # Based on read the docs PDFBuilder 305 rcfile = 'latexmkrc' 306 307 cmd = [ 308 'latexmk', 309 '-r', 310 rcfile, 311 '-pdf', 312 # When ``-f`` is used, latexmk will continue building if it 313 # encounters errors. We still receive a failure exit code in this 314 # case, but the correct steps should run. 315 '-f', 316 '-dvi-', # dont generate dvi 317 '-ps-', # dont generate ps 318 '-interaction=nonstopmode', 319 '-quiet', 320 '-outdir=build', 321 ] 322 323 try: 324 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 325 for c in iter(lambda: p.stdout.readline(), b''): 326 sys.stdout.write(prefix) 327 sys.stdout.write(c.decode('utf-8')) 328 ret = p.wait() 329 assert (ret is not None) 330 sys.stdout.flush() 331 except KeyboardInterrupt: # this seems to be the only way to get Ctrl-C to kill everything? 332 p.kill() 333 os.chdir(saved_cwd) 334 return 130 # FIXME It doesn't return this errorcode, why? Just prints stacktrace 335 os.chdir(saved_cwd) 336 337 return ret 338 339 340 SANITIZE_FILENAME_REGEX = re.compile("[^:]*/([^/:]*)(:.*)") 341 SANITIZE_LINENUM_REGEX = re.compile("([^:]*)(:[0-9]+:)(.*)") 342 343 344 def sanitize_line(line): 345 """ 346 Clear a log message from insignificant parts 347 348 filter: 349 - only filename, no path at the beginning 350 - no line numbers after the filename 351 """ 352 353 line = re.sub(SANITIZE_FILENAME_REGEX, r'\1\2', line) 354 line = re.sub(SANITIZE_LINENUM_REGEX, r'\1:line:\3', line) 355 return line 356 357 358 def check_docs(language, target, log_file, known_warnings_file, out_sanitized_log_file): 359 """ 360 Check for Documentation warnings in `log_file`: should only contain (fuzzy) matches to `known_warnings_file` 361 362 It prints all unknown messages with `target`/`language` prefix 363 It leaves `out_sanitized_log_file` file for observe and debug 364 """ 365 366 # Sanitize all messages 367 all_messages = list() 368 with open(log_file) as f, open(out_sanitized_log_file, 'w') as o: 369 for line in f: 370 sanitized_line = sanitize_line(line) 371 all_messages.append(LogMessage(line, sanitized_line)) 372 o.write(sanitized_line) 373 374 known_messages = list() 375 with open(known_warnings_file) as k: 376 for known_line in k: 377 known_messages.append(known_line) 378 379 if "doxygen" in known_warnings_file: 380 # Clean a known Doxygen limitation: it's expected to always document anonymous 381 # structs/unions but we don't do this in our docs, so filter these all out with a regex 382 # (this won't match any named field, only anonymous members - 383 # ie the last part of the field is is just <something>::@NUM not <something>::name) 384 RE_ANONYMOUS_FIELD = re.compile(r".+:line: warning: parameters of member [^:\s]+(::[^:\s]+)*(::@\d+)+ are not \(all\) documented") 385 all_messages = [msg for msg in all_messages if not re.match(RE_ANONYMOUS_FIELD, msg.sanitized_text)] 386 387 # Collect all new messages that are not match with the known messages. 388 # The order is an important. 389 new_messages = list() 390 known_idx = 0 391 for msg in all_messages: 392 try: 393 known_idx = known_messages.index(msg.sanitized_text, known_idx) 394 except ValueError: 395 new_messages.append(msg) 396 397 if new_messages: 398 print("\n%s/%s: Build failed due to new/different warnings (%s):\n" % (language, target, log_file)) 399 for msg in new_messages: 400 print("%s/%s: %s" % (language, target, msg.original_text), end='') 401 print("\n%s/%s: (Check files %s and %s for full details.)" % (language, target, known_warnings_file, log_file)) 402 return 1 403 404 return 0 405 406 407 def action_linkcheck(args): 408 args.builders = "linkcheck" 409 return parallel_call(args, call_linkcheck) 410 411 412 def call_linkcheck(entry): 413 return sphinx_call(*entry) 414 415 416 # https://github.com/espressif/esp-idf/tree/ 417 # https://github.com/espressif/esp-idf/blob/ 418 # https://github.com/espressif/esp-idf/raw/ 419 GH_LINK_RE = r"https://github.com/espressif/esp-idf/(?:tree|blob|raw)/[^\s]+" 420 421 # we allow this one doc, because we always want users to see the latest support policy 422 GH_LINK_ALLOWED = ["https://github.com/espressif/esp-idf/blob/master/SUPPORT_POLICY.md", 423 "https://github.com/espressif/esp-idf/blob/master/SUPPORT_POLICY_CN.md"] 424 425 426 def action_gh_linkcheck(args): 427 print("Checking for hardcoded GitHub links\n") 428 429 github_links = [] 430 431 docs_dir = os.path.relpath(os.path.dirname(__file__)) 432 for root, _, files in os.walk(docs_dir): 433 if "_build" in root: 434 continue 435 files = [os.path.join(root, f) for f in files if f.endswith(".rst")] 436 for path in files: 437 with open(path, "r") as f: 438 for link in re.findall(GH_LINK_RE, f.read()): 439 if link not in GH_LINK_ALLOWED: 440 github_links.append((path, link)) 441 442 if github_links: 443 for path, link in github_links: 444 print("%s: %s" % (path, link)) 445 print("WARNING: Some .rst files contain hardcoded Github links.") 446 print("Please check above output and replace links with one of the following:") 447 print("- :idf:`dir` - points to directory inside ESP-IDF") 448 print("- :idf_file:`file` - points to file inside ESP-IDF") 449 print("- :idf_raw:`file` - points to raw view of the file inside ESP-IDF") 450 print("- :component:`dir` - points to directory inside ESP-IDF components dir") 451 print("- :component_file:`file` - points to file inside ESP-IDF components dir") 452 print("- :component_raw:`file` - points to raw view of the file inside ESP-IDF components dir") 453 print("- :example:`dir` - points to directory inside ESP-IDF examples dir") 454 print("- :example_file:`file` - points to file inside ESP-IDF examples dir") 455 print("- :example_raw:`file` - points to raw view of the file inside ESP-IDF examples dir") 456 print("These link types will point to the correct GitHub version automatically") 457 return 1 458 else: 459 print("No hardcoded links found") 460 return 0 461 462 463 if __name__ == "__main__": 464 main()