/ docs / build_docs.py
build_docs.py
  1  #!/usr/bin/env python3
  2  # coding=utf-8
  3  #
  4  # Top-level docs builder
  5  #
  6  # This is just a front-end to sphinx-build that can call it multiple times for different language/target combinations
  7  #
  8  # Will build out to _build/LANG/TARGET by default
  9  #
 10  # Specific custom docs functionality should be added in conf_common.py or in a Sphinx extension, not here.
 11  #
 12  # Copyright 2020 Espressif Systems (Shanghai) PTE LTD
 13  #
 14  # Licensed under the Apache License, Version 2.0 (the "License");
 15  # you may not use this file except in compliance with the License.
 16  # You may obtain a copy of the License at
 17  #
 18  #     http://www.apache.org/licenses/LICENSE-2.0
 19  #
 20  # Unless required by applicable law or agreed to in writing, software
 21  # distributed under the License is distributed on an "AS IS" BASIS,
 22  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 23  # See the License for the specific language governing permissions and
 24  # limitations under the License.
 25  #
 26  from __future__ import print_function
 27  import argparse
 28  import locale
 29  import math
 30  import multiprocessing
 31  import os
 32  import os.path
 33  import subprocess
 34  import sys
 35  import re
 36  from packaging import version
 37  from collections import namedtuple
 38  
 39  LANGUAGES = ["en", "zh_CN"]
 40  TARGETS = ["esp32", "esp32s2"]
 41  
 42  SPHINX_WARN_LOG = "sphinx-warning-log.txt"
 43  SPHINX_SANITIZED_LOG = "sphinx-warning-log-sanitized.txt"
 44  SPHINX_KNOWN_WARNINGS = os.path.join(os.environ["IDF_PATH"], "docs", "sphinx-known-warnings.txt")
 45  
 46  DXG_WARN_LOG = "doxygen-warning-log.txt"
 47  DXG_SANITIZED_LOG = "doxygen-warning-log-sanitized.txt"
 48  DXG_KNOWN_WARNINGS = os.path.join(os.environ["IDF_PATH"], "docs", "doxygen-known-warnings.txt")
 49  DXG_CI_VERSION = version.parse('1.8.11')
 50  
 51  LogMessage = namedtuple("LogMessage", "original_text sanitized_text")
 52  
 53  languages = LANGUAGES
 54  targets = TARGETS
 55  
 56  
 57  def main():
 58      # check Python dependencies for docs
 59      try:
 60          subprocess.check_call([sys.executable,
 61                                 os.path.join(os.environ["IDF_PATH"],
 62                                              "tools",
 63                                              "check_python_dependencies.py"),
 64                                 "-r",
 65                                 "{}/docs/requirements.txt".format(os.environ["IDF_PATH"])
 66                                 ])
 67      except subprocess.CalledProcessError:
 68          raise SystemExit(2)  # stdout will already have these errors
 69  
 70      # This is not the only way to make sure that all files opened by Python are treated as UTF-8, but the other way is passing encoding='utf-8' to all open()
 71      # functions and this way makes Python 2 compatibility really tough if there is any code that assumes text files contain strings (kconfiglib assumes this).
 72      # The reason for that is that you need to import io.open() to support the encoding argument on Python 2, and this function always uses Py2's unicode
 73      # type not the str type.
 74      if ('UTF-8' not in locale.getlocale()) and ('utf8' not in locale.getlocale()):
 75          raise RuntimeError("build_docs.py requires the default locale's encoding to be UTF-8.\n" +
 76                             " - Linux. Setting environment variable LC_ALL=C.UTF-8 when running build_docs.py may be " +
 77                             "enough to fix this.\n"
 78                             " - Windows. Possible solution for the Windows 10 starting version 1803. Go to " +
 79                             "Control Panel->Clock and Region->Region->Administrative->Change system locale...; " +
 80                             "Check `Beta: Use Unicode UTF-8 for worldwide language support` and reboot")
 81  
 82      parser = argparse.ArgumentParser(description='build_docs.py: Build IDF docs', prog='build_docs.py')
 83  
 84      parser.add_argument("--language", "-l", choices=LANGUAGES, required=False)
 85      parser.add_argument("--target", "-t", choices=TARGETS, required=False)
 86      parser.add_argument("--build-dir", "-b", type=str, default="_build")
 87      parser.add_argument("--source-dir", "-s", type=str, default="")
 88      parser.add_argument("--builders", "-bs", nargs='+', type=str, default=["html"],
 89                          help="List of builders for Sphinx, e.g. html or latex, for latex a PDF is also generated")
 90      parser.add_argument("--sphinx-parallel-builds", "-p", choices=["auto"] + [str(x) for x in range(8)],
 91                          help="Parallel Sphinx builds - number of independent Sphinx builds to run", default="auto")
 92      parser.add_argument("--sphinx-parallel-jobs", "-j", choices=["auto"] + [str(x) for x in range(8)],
 93                          help="Sphinx parallel jobs argument - number of threads for each Sphinx build to use", default="1")
 94      parser.add_argument("--input-docs", "-i", nargs='+', default=[""],
 95                          help="List of documents to build relative to the doc base folder, i.e. the language folder. Defaults to all documents")
 96  
 97      action_parsers = parser.add_subparsers(dest='action')
 98  
 99      build_parser = action_parsers.add_parser('build', help='Build documentation')
100      build_parser.add_argument("--check-warnings-only", "-w", action='store_true')
101  
102      action_parsers.add_parser('linkcheck', help='Check links (a current IDF revision should be uploaded to GitHub)')
103  
104      action_parsers.add_parser('gh-linkcheck', help='Checking for hardcoded GitHub links')
105  
106      args = parser.parse_args()
107  
108      global languages
109      if args.language is None:
110          print("Building all languages")
111          languages = LANGUAGES
112      else:
113          languages = [args.language]
114  
115      global targets
116      if args.target is None:
117          print("Building all targets")
118          targets = TARGETS
119      else:
120          targets = [args.target]
121  
122      if args.action == "build" or args.action is None:
123          if args.action is None:
124              args.check_warnings_only = False
125          sys.exit(action_build(args))
126  
127      if args.action == "linkcheck":
128          sys.exit(action_linkcheck(args))
129  
130      if args.action == "gh-linkcheck":
131          sys.exit(action_gh_linkcheck(args))
132  
133  
134  def parallel_call(args, callback):
135      num_sphinx_builds = len(languages) * len(targets)
136      num_cpus = multiprocessing.cpu_count()
137  
138      if args.sphinx_parallel_builds == "auto":
139          # at most one sphinx build per CPU, up to the number of CPUs
140          args.sphinx_parallel_builds = min(num_sphinx_builds, num_cpus)
141      else:
142          args.sphinx_parallel_builds = int(args.sphinx_parallel_builds)
143  
144      # Force -j1 because sphinx works incorrectly
145      args.sphinx_parallel_jobs = 1
146      if args.sphinx_parallel_jobs == "auto":
147          # N CPUs per build job, rounded up - (maybe smarter to round down to avoid contention, idk)
148          args.sphinx_parallel_jobs = int(math.ceil(num_cpus / args.sphinx_parallel_builds))
149      else:
150          args.sphinx_parallel_jobs = int(args.sphinx_parallel_jobs)
151  
152      print("Will use %d parallel builds and %d jobs per build" % (args.sphinx_parallel_builds, args.sphinx_parallel_jobs))
153      pool = multiprocessing.Pool(args.sphinx_parallel_builds)
154  
155      if args.sphinx_parallel_jobs > 1:
156          print("WARNING: Sphinx parallel jobs currently produce incorrect docs output with Sphinx 1.8.5")
157  
158      # make a list of all combinations of build_docs() args as tuples
159      #
160      # there's probably a fancy way to do this with itertools but this way is actually readable
161      entries = []
162      for target in targets:
163          for language in languages:
164              build_dir = os.path.realpath(os.path.join(args.build_dir, language, target))
165              source_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), args.source_dir, language)
166  
167              entries.append((language, target, build_dir, source_dir, args.sphinx_parallel_jobs, args.builders, args.input_docs))
168  
169      print(entries)
170      errcodes = pool.map(callback, entries)
171      print(errcodes)
172  
173      is_error = False
174      for ret in errcodes:
175          if ret != 0:
176              print("\nThe following language/target combinations failed to build:")
177              is_error = True
178              break
179      if is_error:
180          for ret, entry in zip(errcodes, entries):
181              if ret != 0:
182                  print("language: %s, target: %s, errcode: %d" % (entry[0], entry[1], ret))
183          # Don't re-throw real error code from each parallel process
184          return 1
185      else:
186          return 0
187  
188  
189  def sphinx_call(language, target, build_dir, src_dir, sphinx_parallel_jobs, buildername, input_docs):
190      # Note: because this runs in a multiprocessing Process, everything which happens here should be isolated to a single process
191      # (ie it doesn't matter if Sphinx is using global variables, as they're it's own copy of the global variables)
192  
193      # wrap stdout & stderr in a way that lets us see which build_docs instance they come from
194      #
195      # this doesn't apply to subprocesses, they write to OS stdout & stderr so no prefix appears
196      prefix = "%s/%s: " % (language, target)
197  
198      print("Building in build_dir: %s" % (build_dir))
199      try:
200          os.makedirs(build_dir)
201      except OSError:
202          pass
203  
204      environ = {}
205      environ.update(os.environ)
206      environ['BUILDDIR'] = build_dir
207  
208      args = [sys.executable, "-u", "-m", "sphinx.cmd.build",
209              "-j", str(sphinx_parallel_jobs),
210              "-b", buildername,
211              "-d", os.path.join(build_dir, "doctrees"),
212              "-w", SPHINX_WARN_LOG,
213              "-t", target,
214              "-D", "idf_target={}".format(target),
215              "-D", "docs_to_build={}".format(",". join(input_docs)),
216              src_dir,
217              os.path.join(build_dir, buildername)                    # build directory
218              ]
219  
220      saved_cwd = os.getcwd()
221      os.chdir(build_dir)  # also run sphinx in the build directory
222      print("Running '%s'" % (" ".join(args)))
223  
224      ret = 1
225      try:
226          # Note: we can't call sphinx.cmd.build.main() here as multiprocessing doesn't est >1 layer deep
227          # and sphinx.cmd.build() also does a lot of work in the calling thread, especially for j ==1,
228          # so using a Pyhthon thread for this part is  a poor option (GIL)
229          p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
230          for c in iter(lambda: p.stdout.readline(), b''):
231              sys.stdout.write(prefix)
232              sys.stdout.write(c.decode('utf-8'))
233          ret = p.wait()
234          assert (ret is not None)
235          sys.stdout.flush()
236      except KeyboardInterrupt:  # this seems to be the only way to get Ctrl-C to kill everything?
237          p.kill()
238          os.chdir(saved_cwd)
239          return 130  # FIXME It doesn't return this errorcode, why? Just prints stacktrace
240      os.chdir(saved_cwd)
241      return ret
242  
243  
244  def action_build(args):
245      if not args.check_warnings_only:
246          ret = parallel_call(args, call_build_docs)
247          if ret != 0:
248              return ret
249  
250  
251  def check_doxygen_version():
252      # Different version of doxygen may produce different warnings
253      # This could cause a build to fail locally, but pass CI and vice versa
254      process = subprocess.run(['doxygen', '--version'], encoding='utf-8', stdout=subprocess.PIPE)
255      doxygen_ver = process.stdout.strip()
256  
257      if version.parse(doxygen_ver) > DXG_CI_VERSION:
258          print('Local doxygen version {} is newer than CI doxygen version {}. Local build may contain '
259                'warnings that will not be raised when built by CI.'.format(doxygen_ver, DXG_CI_VERSION))
260  
261  
262  def call_build_docs(entry):
263      (language, target, build_dir, src_dir, sphinx_parallel_jobs, builders, input_docs) = entry
264      for buildername in builders:
265          ret = sphinx_call(language, target, build_dir, src_dir, sphinx_parallel_jobs, buildername, input_docs)
266  
267          # Warnings are checked after each builder as logs are overwritten
268          # check Doxygen warnings:
269          ret += check_docs(language, target,
270                            log_file=os.path.join(build_dir, DXG_WARN_LOG),
271                            known_warnings_file=DXG_KNOWN_WARNINGS,
272                            out_sanitized_log_file=os.path.join(build_dir, DXG_SANITIZED_LOG))
273          # check Sphinx warnings:
274          ret += check_docs(language, target,
275                            log_file=os.path.join(build_dir, SPHINX_WARN_LOG),
276                            known_warnings_file=SPHINX_KNOWN_WARNINGS,
277                            out_sanitized_log_file=os.path.join(build_dir, SPHINX_SANITIZED_LOG))
278  
279          if ret != 0:
280              check_doxygen_version()
281              return ret
282  
283      # Build PDF from tex
284      if 'latex' in builders:
285          latex_dir = os.path.join(build_dir, "latex")
286          ret = build_pdf(language, target, latex_dir)
287  
288      return ret
289  
290  
291  def build_pdf(language, target, latex_dir):
292      # Note: because this runs in a multiprocessing Process, everything which happens here should be isolated to a single process
293  
294      # wrap stdout & stderr in a way that lets us see which build_docs instance they come from
295      #
296      # this doesn't apply to subprocesses, they write to OS stdout & stderr so no prefix appears
297      prefix = "%s/%s: " % (language, target)
298  
299      print("Building PDF in latex_dir: %s" % (latex_dir))
300  
301      saved_cwd = os.getcwd()
302      os.chdir(latex_dir)
303  
304      # Based on read the docs PDFBuilder
305      rcfile = 'latexmkrc'
306  
307      cmd = [
308          'latexmk',
309          '-r',
310          rcfile,
311          '-pdf',
312          # When ``-f`` is used, latexmk will continue building if it
313          # encounters errors. We still receive a failure exit code in this
314          # case, but the correct steps should run.
315          '-f',
316          '-dvi-',    # dont generate dvi
317          '-ps-',     # dont generate ps
318          '-interaction=nonstopmode',
319          '-quiet',
320          '-outdir=build',
321      ]
322  
323      try:
324          p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
325          for c in iter(lambda: p.stdout.readline(), b''):
326              sys.stdout.write(prefix)
327              sys.stdout.write(c.decode('utf-8'))
328          ret = p.wait()
329          assert (ret is not None)
330          sys.stdout.flush()
331      except KeyboardInterrupt:  # this seems to be the only way to get Ctrl-C to kill everything?
332          p.kill()
333          os.chdir(saved_cwd)
334          return 130  # FIXME It doesn't return this errorcode, why? Just prints stacktrace
335      os.chdir(saved_cwd)
336  
337      return ret
338  
339  
340  SANITIZE_FILENAME_REGEX = re.compile("[^:]*/([^/:]*)(:.*)")
341  SANITIZE_LINENUM_REGEX = re.compile("([^:]*)(:[0-9]+:)(.*)")
342  
343  
344  def sanitize_line(line):
345      """
346      Clear a log message from insignificant parts
347  
348      filter:
349          - only filename, no path at the beginning
350          - no line numbers after the filename
351      """
352  
353      line = re.sub(SANITIZE_FILENAME_REGEX, r'\1\2', line)
354      line = re.sub(SANITIZE_LINENUM_REGEX, r'\1:line:\3', line)
355      return line
356  
357  
358  def check_docs(language, target, log_file, known_warnings_file, out_sanitized_log_file):
359      """
360      Check for Documentation warnings in `log_file`: should only contain (fuzzy) matches to `known_warnings_file`
361  
362      It prints all unknown messages with `target`/`language` prefix
363      It leaves `out_sanitized_log_file` file for observe and debug
364      """
365  
366      # Sanitize all messages
367      all_messages = list()
368      with open(log_file) as f, open(out_sanitized_log_file, 'w') as o:
369          for line in f:
370              sanitized_line = sanitize_line(line)
371              all_messages.append(LogMessage(line, sanitized_line))
372              o.write(sanitized_line)
373  
374      known_messages = list()
375      with open(known_warnings_file) as k:
376          for known_line in k:
377              known_messages.append(known_line)
378  
379      if "doxygen" in known_warnings_file:
380          # Clean a known Doxygen limitation: it's expected to always document anonymous
381          # structs/unions but we don't do this in our docs, so filter these all out with a regex
382          # (this won't match any named field, only anonymous members -
383          # ie the last part of the field is is just <something>::@NUM not <something>::name)
384          RE_ANONYMOUS_FIELD = re.compile(r".+:line: warning: parameters of member [^:\s]+(::[^:\s]+)*(::@\d+)+ are not \(all\) documented")
385          all_messages = [msg for msg in all_messages if not re.match(RE_ANONYMOUS_FIELD, msg.sanitized_text)]
386  
387      # Collect all new messages that are not match with the known messages.
388      # The order is an important.
389      new_messages = list()
390      known_idx = 0
391      for msg in all_messages:
392          try:
393              known_idx = known_messages.index(msg.sanitized_text, known_idx)
394          except ValueError:
395              new_messages.append(msg)
396  
397      if new_messages:
398          print("\n%s/%s: Build failed due to new/different warnings (%s):\n" % (language, target, log_file))
399          for msg in new_messages:
400              print("%s/%s: %s" % (language, target, msg.original_text), end='')
401          print("\n%s/%s: (Check files %s and %s for full details.)" % (language, target, known_warnings_file, log_file))
402          return 1
403  
404      return 0
405  
406  
407  def action_linkcheck(args):
408      args.builders = "linkcheck"
409      return parallel_call(args, call_linkcheck)
410  
411  
412  def call_linkcheck(entry):
413      return sphinx_call(*entry)
414  
415  
416  # https://github.com/espressif/esp-idf/tree/
417  # https://github.com/espressif/esp-idf/blob/
418  # https://github.com/espressif/esp-idf/raw/
419  GH_LINK_RE = r"https://github.com/espressif/esp-idf/(?:tree|blob|raw)/[^\s]+"
420  
421  # we allow this one doc, because we always want users to see the latest support policy
422  GH_LINK_ALLOWED = ["https://github.com/espressif/esp-idf/blob/master/SUPPORT_POLICY.md",
423                     "https://github.com/espressif/esp-idf/blob/master/SUPPORT_POLICY_CN.md"]
424  
425  
426  def action_gh_linkcheck(args):
427      print("Checking for hardcoded GitHub links\n")
428  
429      github_links = []
430  
431      docs_dir = os.path.relpath(os.path.dirname(__file__))
432      for root, _, files in os.walk(docs_dir):
433          if "_build" in root:
434              continue
435          files = [os.path.join(root, f) for f in files if f.endswith(".rst")]
436          for path in files:
437              with open(path, "r") as f:
438                  for link in re.findall(GH_LINK_RE, f.read()):
439                      if link not in GH_LINK_ALLOWED:
440                          github_links.append((path, link))
441  
442      if github_links:
443          for path, link in github_links:
444              print("%s: %s" % (path, link))
445          print("WARNING: Some .rst files contain hardcoded Github links.")
446          print("Please check above output and replace links with one of the following:")
447          print("- :idf:`dir` - points to directory inside ESP-IDF")
448          print("- :idf_file:`file` - points to file inside ESP-IDF")
449          print("- :idf_raw:`file` - points to raw view of the file inside ESP-IDF")
450          print("- :component:`dir` - points to directory inside ESP-IDF components dir")
451          print("- :component_file:`file` - points to file inside ESP-IDF components dir")
452          print("- :component_raw:`file` - points to raw view of the file inside ESP-IDF components dir")
453          print("- :example:`dir` - points to directory inside ESP-IDF examples dir")
454          print("- :example_file:`file` - points to file inside ESP-IDF examples dir")
455          print("- :example_raw:`file` - points to raw view of the file inside ESP-IDF examples dir")
456          print("These link types will point to the correct GitHub version automatically")
457          return 1
458      else:
459          print("No hardcoded links found")
460          return 0
461  
462  
463  if __name__ == "__main__":
464      main()