/ maint / rpc-docs-tool
rpc-docs-tool
  1  #!/usr/bin/env python3
  2  
  3  """
  4  Script to generate a document about Arti RPC objects and methods.
  5  
  6  - Extracts a list of methods and objects by running Arti RPC.
  7  - Extracts markdown documentation by running nightly rustdoc to get JSON output.
  8  
  9  - Outputs markdown or html.
 10  
 11  Requires rustdoc nightly, for json support.
 12  May require specific versions of rustdoc nightly,
 13  since the json format is unstable.
 14  
 15  Known to work with nightly from 2024-10-22.
 16  """
 17  
 18  import io
 19  import json
 20  import os
 21  import re
 22  import subprocess
 23  import sys
 24  
 25  # What version of rustdoc json do we know?
 26  SUPPORTED_FORMAT_VERSION = 35
 27  # What nightly do we know to provide that version?
 28  KNOWN_GOOD_NIGHTLY = "nightly-2024-10-22"
 29  
 30  # Tricky bits, unsolved
 31  #   - Converting links in the md to be correct.
 32  #   - Extracting only relevant portions of the rustdoc.
 33  
 34  # Location of the arti checkout.
 35  ARTI_ROOT = os.path.split(os.path.dirname(__file__))[0]
 36  
 37  # Where will we find our rustdoc files?
 38  #
 39  # (This assumes we're being run from an arti checkout.)
 40  TARGET_DIR = os.path.join(ARTI_ROOT, "target")
 41  
 42  # With respect to where should we generate our links to objects in rustdoc?
 43  RUSTDOC_ROOT = "https://tpo.pages.torproject.net/core/doc/rust/private/"
 44  
 45  # A string we'll use to identify RPC-specific sections in a markdown document.
 46  # (Case-insensitive.)
 47  MAGIC_HEADING = "In The Arti RPC System"
 48  
 49  # HTML that we'll use to decorate our markdown output.
 50  HTML_HEADER = """\
 51  <!DOCTYPE html>
 52  <html lang="en">
 53  <head>
 54  <meta charset="UTF-8">
 55  <title>Arti RPC: methods and objects</title>
 56  </head>
 57  """
 58  HTML_FOOTER = """\
 59  </html>
 60  """
 61  
 62  # This isn't installed by default, so try to give a helpful message.
 63  try:
 64      import marko
 65      import marko.md_renderer
 66  except ImportError:
 67      print(
 68          "You need to install marko so that we can parse and manipulate markdown.",
 69          file=sys.stderr,
 70      )
 71      print("Try `pip3 install marko`.", file=sys.stderr)
 72      sys.exit(1)
 73  
 74  # This is part of this crate, so try using the version here if we can't
 75  # find a system copy.
 76  try:
 77      import arti_rpc
 78  except ImportError:
 79      print("arti_rpc not installed. Faking it...", file=sys.stderr)
 80      pypath = os.path.join(ARTI_ROOT, "python", "arti_rpc", "src")
 81      sys.path.append(pypath)
 82      import arti_rpc
 83  
 84  
 85  def load_methods_via_rpc():
 86      """
 87      Connect to Arti via RPC and ask it for a list of RPC methods.
 88  
 89      Return value is as for "x_list_all_rpc_methods".
 90      """
 91  
 92      # TODO RPC: This isn't how connection strings will work in production
 93      socket_path = os.path.expanduser("~/.local/run/arti/SOCKET")
 94      connect_string = f"unix:{socket_path}"
 95      try:
 96          connection = arti_rpc.ArtiRpcConn(connect_string)
 97      except arti_rpc.ArtiRpcError as e:
 98          print(f"Cannot connect to Arti via RPC: {e}")
 99          print(
100              "Is Arti running? Is it built with RPC support? Did you configure it to listen on RPC?"
101          )
102          sys.exit(1)
103  
104      # Get a list of RPC methods.
105      methods = connection.session().invoke("arti:x_list_all_rpc_methods")
106      del connection  # TODO: Implement a connection.close(); Implement "with connection".
107      return methods
108  
109  
110  def mdheader(s, anchor):
111      """
112      Return Text for a markdown header with
113      text `s` (including the ##### header prefix), marking it with a given HTML anchor.
114      """
115      return f"""
116  <div id="{anchor}">
117  
118  {s}
119  
120  </div>
121  
122  """
123  
124  
125  def doc_subject(doc):
126      """
127      Return everything from `doc` up to the first blank line.
128  
129      (By rustdoc convention, this is a summary about the documented item.)
130      """
131      lines = [s.strip() for s in doc.split("\n")]
132      try:
133          blank = lines.index("")
134          return " ".join(lines[:blank])
135      except ValueError:
136          return " ".join(lines)
137  
138  
139  # An object to parse and render markdown
140  MD = marko.Markdown(renderer=marko.md_renderer.MarkdownRenderer)
141  
142  
143  def heading_matches(h, s):
144      """
145      Return true if the markdown Heading object `h` has exactly
146      the case-insensitive text `s`.
147      """
148      if len(h.children) != 1:
149          return False
150      if h.children[0].get_type() != "RawText":
151          return False
152      h_text = h.children[0].children
153      return h_text.strip().lower() == s.strip().lower()
154  
155  
156  def extract_md_section(document, magic_heading=MAGIC_HEADING):
157      """
158      Try to extract the special markdown section marked with `magic_heading`
159      from the markdown `document`.
160  
161      The section continues until the beginning of a section header
162      with an equal or greater priority.
163  
164      On success, replace the whole document with the section.
165  
166      If the section is not found, leave the document alone.
167      """
168      new_children = []
169      in_section = False
170      section_h_level = None
171  
172      for elt in document.children:
173          is_heading = elt.get_type() == "Heading"
174          if not in_section and is_heading:
175              if heading_matches(elt, magic_heading):
176                  in_section = True
177                  section_h_level = elt.level
178  
179          elif in_section and is_heading:
180              if elt.level > section_h_level:
181                  # new heading inside the desired section
182                  new_children.append(elt)
183              else:
184                  # Started a new section of the same or higher level.
185                  in_section = False
186  
187          elif in_section:
188              new_children.append(elt)
189  
190      # If we found the section at all, we replace the document.
191      if new_children:
192          document.children = new_children
193  
194  
195  def recursively_fix_links(doc, linkfixer):
196      """
197      Run the function `linkfixer` on every markdown link inside `doc`.
198      """
199      for elt in doc.children:
200          if elt.get_type() == "Link":
201              linkfixer(elt)
202          if isinstance(elt.children, list):
203              recursively_fix_links(elt, linkfixer)
204  
205  
206  def adjust_doc(doc, link_table, outer_h_level=3):
207      """
208      Modify a markdown document that we've gotten from rustdoc json:
209  
210      - If there is a section named with `MAGIC_HEADING`,
211        include only that section.
212  
213      - Increment or decrement the heading level so that the entire document
214        nests within a heading of level outer_h_level.
215  
216      - Adjust every link that we find. (TODO RPC: Implement!)
217  
218      Returns a string rendering of the document.
219      """
220      parser = marko.parser.Parser()
221  
222      # We add a bunch of Rustdoc link definitions to the end of the document,
223      # since otherwise marko won't know how to handle them.
224      parts = [doc, "\n\n"]
225      for k, v in link_table.items():
226          parts.append(f"[{k}]: {v}\n")
227      doc = "".join(parts)
228  
229      document = parser.parse(doc)
230  
231      ####
232      # Pull out the section that's called MAGIC_HEADING, if any.
233      extract_md_section(document)
234  
235      ####
236      # Adjust heading levels so that they nest within outer_h_level.
237      try:
238          min_hlevel = min(
239              elt.level for elt in document.children if elt.get_type() == "Heading"
240          )
241          adjustment = outer_h_level + 1 - min_hlevel
242      except ValueError:
243          # There aren't any headings, so we pick an unworkable adjustment.
244          adjustment = "unused"
245      for elt in document.children:
246          if elt.get_type == "Heading":
247              elt.level += adjustment
248  
249      ####
250      # Make rustdoc links into something better.
251      #
252      # TODO: decide what to actually do here.
253      recursively_fix_links(document, lambda lnk: None)
254  
255      return MD.render(document)
256  
257  
258  def un_generic(s):
259      """
260      Remove all generics from a rust type `s`
261      """
262      if "<" in s:
263          idx = s.find("<")
264          return s[:idx]
265      else:
266          return s
267  
268  
269  class RustIdent:
270      """
271      A Rust identifier, taken from a Rust type returned by Arti RPC.
272      """
273  
274      def __init__(self, ident):
275          assert ident is not None
276          self.ident = un_generic(ident)
277  
278      def crate(self):
279          """Return the crate in which this type is declared."""
280          # Note: We unconditionally replace _ with - in the crate name.
281          # This is valid for Arti crates, which are never named with _,
282          # but it isn't valid in general.
283          return self.ident.split("::")[0].replace("_", "-")
284  
285      def __eq__(self, other):
286          return self.ident == other.ident
287  
288      def __hash__(self):
289          return hash(self.ident)
290  
291      def __repr__(self):
292          return f"RustIdent({self.ident!r})"
293  
294      def __str__(self):
295          return self.ident
296  
297  
298  class RpcUniverse:
299      """
300      A set of RPC methods, objects, and result types.
301      """
302  
303      # Fields
304      #
305      # methods: A map from method name dicts of `applies_to_object_types`,
306      #      `method_type`, `output_type`, and an optional `update_type`.
307      # delegations: A map from RustIdent for an RPC Object type to a
308      #   list of the RustIdents which it can delegate to.
309      # rustdoc: A map from crate to Rustdoc object.
310      def __init__(self, method_list):
311          self.methods = dict()
312          for mname, minfo in method_list["methods"].items():
313              atypes = [RustIdent(t) for t in minfo["applies_to_object_types"]]
314              mtype = RustIdent(minfo["method_type"])
315              otype = RustIdent(minfo["output_type"])
316              if minfo["update_type"] is not None:
317                  utype = RustIdent(minfo["update_type"])
318              else:
319                  utype = None
320              self.methods[mname] = {
321                  "applies_to_object_types": atypes,
322                  "method_type": mtype,
323                  "output_type": otype,
324                  "update_type": utype,
325              }
326  
327          self.delegations = dict()
328          for t, lst in method_list["delegations"].items():
329              self.delegations[RustIdent(t)] = [RustIdent(t2) for t2 in lst]
330  
331          self.apply_delegations()
332          self.rustdoc = dict()
333  
334      def method_types(self):
335          """Return a set of every known RPC method"""
336          return set(minfo["method_type"] for minfo in self.methods.values())
337  
338      def object_types(self):
339          """Return a set of every known RPC object type"""
340          s = set()
341          for minfo in self.methods.values():
342              s.update(minfo["applies_to_object_types"])
343          return s
344  
345      def output_types(self):
346          """Return a set of every known type that can be output from an RPC method"""
347          return set(minfo["output_type"] for minfo in self.methods.values())
348  
349      def update_types(self):
350          """Return a set of every known type that can be an update from an RPC method."""
351          return set(
352              minfo["update_type"]
353              for minfo in self.methods.values()
354              if minfo["update_type"] is not None
355          )
356  
357      def all_types(self):
358          """Return a set of all rust types that are relevant for RPC."""
359          s = self.method_types()
360          s.update(self.object_types())
361          s.update(self.output_types())
362          s.update(self.update_types())
363          return s
364  
365      def relevant_crates(self):
366          """Return a list of crates that contain a type relevant to RPC."""
367          return set(ident.crate() for ident in self.all_types())
368  
369      def apply_delegations(self):
370          """
371          Adjust the `applies_to_object_types` field for every method,
372          to take into account object delegation.
373          """
374          for minfo in self.methods.values():
375              to_add = []
376              applies_list = minfo["applies_to_object_types"]
377              for applies_to in applies_list:
378                  if applies_to in self.delegations:
379                      to_add.extend(self.delegations[applies_to])
380  
381              for add in to_add:
382                  if add not in applies_list:
383                      applies_list.append(add)
384  
385      def build_and_load_rustdoc(self):
386          """
387          Build and load the rustdoc for every relevant crate.
388          """
389          for crate in self.relevant_crates():
390              build_rustdoc_json(crate)
391              path = os.path.join(TARGET_DIR, "doc", crate.replace("-", "_") + ".json")
392              with open(path, "r", encoding="UTF-8") as f:
393                  doc = json.load(f)
394                  self.rustdoc[crate] = Rustdoc(doc)
395  
396      def get_methods_for_obj(self, ident):
397          """
398          Return a generator over every method that can apply to the object `ident`.
399          """
400          for mname, minfo in self.methods.items():
401              if ident in minfo["applies_to_object_types"]:
402                  yield mname
403  
404      def get_doc(self, ident):
405          """
406          Return the rustdoc for the object `ident`.
407          """
408          return self.rustdoc[ident.crate()].get_doc(ident)
409  
410      def get_link_table(self, ident):
411          """
412          Return a link table to interpret links in the rustdoc
413          for the object `ident`.
414          """
415          return self.rustdoc[ident.crate()].get_link_table(ident)
416  
417      def get_rustdoc_url(self, ident):
418          """
419          Return a canonical rustdoc URL (within RUSTDOC_ROOT) for `ident`.
420          """
421          return self.rustdoc[ident.crate()].get_rustdoc_url(ident)
422  
423      def emit_method_index(self, f):
424          """
425          Write to `f` an index of all known RPC methods.
426          """
427          f.write(mdheader("### Method index", "idx:methods"))
428          for m, obj in self.methods.items():
429              m_str = m
430              m_anchor = "method:" + m_str
431              m_ident = obj["method_type"]
432              m_summary = doc_subject(self.get_doc(m_ident))
433              f.write(f"- [`{m_str}`](#{m_anchor}) — {m_summary}\n")
434          f.write("\n\n")
435  
436      def emit_single_method(self, f, m_str, obj):
437          """
438          Write to `f` the documentation for the single method m_str, whose "minfo"
439          object is `obj`.
440          """
441          m_ident = obj["method_type"]
442          m_doc = self.get_doc(m_ident)
443          link_table = self.get_link_table(m_ident)
444          m_summary = doc_subject(m_doc)
445          m_anchor = "method:" + m_str
446          header = f"### `{m_str}` — {m_summary}"
447          f.write(mdheader(header, m_anchor))
448          f.write(adjust_doc(m_doc, link_table))
449          f.write("\n\n")
450  
451          rustdoc_url = self.get_rustdoc_url(m_ident)
452          f.write(f"- [Rustdoc]({rustdoc_url})\n")
453          if self.rustdoc[m_ident.crate()].type_has_no_fields(m_ident):
454              f.write("- (Takes no parameters)\n")
455          else:
456              f.write(f"- [Parameters]({rustdoc_url}#fields)\n")
457          otype = obj["output_type"]
458          otype_url = self.get_rustdoc_url(otype)
459          f.write(f"- Returns [`{otype}`]({otype_url})\n")
460          utype = obj["update_type"]
461          if utype is not None:
462              utype_url = self.get_rustdoc_url(utype)
463              f.write(f"- Yields incremental updates of [`{utype}`]({utype_url})\n")
464          else:
465              f.write("- (No incremental updates)\n")
466  
467          f.write("- **Implemented by**\n")
468          for itype in obj["applies_to_object_types"]:
469              itype_url = self.get_rustdoc_url(itype)
470              anchor = f"object:{itype}"
471              f.write(f"    - [`{itype}`](#{anchor}) ([Rustdoc]({itype_url}))\n")
472  
473          f.write("\n\n")
474  
475      def emit_methods(self, f):
476          """
477          Write to `f` all per-method documentation.
478          """
479          for m, obj in self.methods.items():
480              self.emit_single_method(f, m, obj)
481  
482      def emit_object_index(self, f):
483          """
484          Write to `f` an index of all known RPC object types.
485          """
486          f.write(mdheader("### Object index", "idx:objects"))
487  
488          for o_name in self.object_types():
489              o_anchor = f"object:{o_name}"
490              o_summary = doc_subject(self.get_doc(o_name))
491              f.write(f"- [`{o_name}`](#{o_anchor}) — {o_summary}\n")
492          f.write("\n\n")
493  
494      def emit_single_object(self, f, o_name):
495          """
496          Write to `f` the documentation for a single RPC object whose type is o_name.
497          """
498          o_anchor = f"object:{o_name}"
499          o_doc = self.get_doc(o_name)
500          link_table = self.get_link_table(o_name)
501          o_summary = doc_subject(o_doc)
502          header = f"### `{o_name}` — {o_summary}"
503          f.write(mdheader(header, o_anchor))
504          f.write(adjust_doc(o_doc, link_table))
505          f.write("\n\n")
506          rustdoc_url = self.get_rustdoc_url(o_name)
507          f.write(f"- [Rustdoc]({rustdoc_url})\n")
508          f.write("- **Implements methods**\n")
509  
510          for m_name in sorted(list(self.get_methods_for_obj(o_name))):
511              m_anchor = "method:" + m_name
512              f.write(f"   - [`{m_name}`](#{m_anchor})\n")
513          f.write("\n\n")
514  
515      def emit_objects(self, f):
516          """
517          Write all per-object documentation to `f`.
518          """
519          for o_name in self.object_types():
520              self.emit_single_object(f, o_name)
521  
522      def emit_docs(self, f):
523          """
524          Write all markdown documentation to `f`
525          """
526          f.write(mdheader("## Methods", "methods"))
527          self.emit_method_index(f)
528          self.emit_methods(f)
529          f.write("\n----\n")
530  
531          f.write(mdheader("## Objects", "objects"))
532          self.emit_object_index(f)
533          self.emit_objects(f)
534  
535  
536  have_warned_about_json_version = False
537  
538  
539  def warn_about_json_version(actual_version):
540      """
541      If we haven't previously done so, warn the user that their nightly
542      is generating an unrecognized version of rustdoc json.
543      """
544      global have_warned_about_json_version
545      if not have_warned_about_json_version:
546          have_warned_about_json_version = True
547          msg = f"""
548                WARNING: Rustdoc json is in format version {actual_version},
549                but this tool expects {SUPPORTED_FORMAT_VERSION}.
550                If this fails, you might need to fix the tool,
551                or switch to {KNOWN_GOOD_NIGHTLY}.
552          """
553          msg = re.sub(r"\s+", " ", msg).strip()
554          print(msg)
555  
556  
557  class Rustdoc:
558      """
559      The rustdoc for a single crate.
560      """
561  
562      def __init__(self, json_doc):
563          actual_version = json_doc.get("format_version")
564          if actual_version != SUPPORTED_FORMAT_VERSION:
565              warn_about_json_version(actual_version)
566  
567          self.doc = json_doc
568  
569          self.idx_by_ident = dict()
570          for idx, obj in self.doc["paths"].items():
571              path = "::".join(obj["path"])
572              self.idx_by_ident[RustIdent(path)] = idx
573  
574      def get_doc(self, ident):
575          """
576          Find the string holding documentation for a single identifier within this crate.
577          """
578          obj = self.doc["index"][self.idx_by_ident[ident]]
579          return obj["docs"]
580  
581      def get_link_table(self, ident):
582          """
583          Return a dict mapping reference link ID to corresponding URLs for
584          the documentation of `ident`.
585          """
586          obj = self.doc["index"][self.idx_by_ident[ident]]
587          links = obj["links"]
588          table = {}
589          for content, idx in links.items():
590              table[content] = self.get_rustdoc_url_by_idx(idx)
591          return table
592  
593      def get_rustdoc_url(self, ident):
594          """
595          Return a URL for the documentation for an identifier within this crate,
596          relative to a rustdoc installation at RUSTDOC_ROOT.
597          """
598          idx = self.idx_by_ident[ident]
599          return self.get_rustdoc_url_by_idx(idx)
600  
601      def get_rustdoc_url_by_idx(self, idx):
602          """
603          Return a URL according to a rustdoc "index".
604          """
605          obj = self.doc["paths"][str(idx)]
606          kind = obj["kind"]
607          path = obj["path"][:]
608          path[-1] = f"{kind}.{path[-1]}.html"
609          return RUSTDOC_ROOT + "/".join(path)
610  
611      def type_has_no_fields(self, ident):
612          """Return true if `ident` is a type that definitely has no fields."""
613          obj = self.doc["index"][self.idx_by_ident[ident]]
614          try:
615              fields = obj["inner"]["struct"]["kind"]["plain"]["fields"]
616          except KeyError:
617              return False
618  
619          return len(fields) == 0
620  
621  
622  def build_rustdoc_json(crate):
623      """
624      Use `cargo +nightly rustdoc` to build the json rustdoc for a single crate.
625      """
626      args = [
627          "cargo",
628          "+nightly",
629          "rustdoc",
630          "--quiet",
631          "--output-format",
632          "json",
633          "--all-features",
634          "-Zunstable-options",
635          "-p",
636          crate,
637          "--",
638          "--document-private-items",
639      ]
640      subprocess.run(args, check=True)
641  
642  
643  def run(output, fmt="md"):
644      """Process input from arti and rustdoc, and write a combined document
645      to `output`.
646  
647      Write markdown if `fmt` is "md", and HTML if `fmt` is "html".
648      """
649      print(
650          "== STEP 1: Asking Arti RPC for a list of types and methods.", file=sys.stderr
651      )
652  
653      methods = load_methods_via_rpc()
654      universe = RpcUniverse(methods)
655  
656      n_methods = len(methods["methods"])
657      n_types = len(universe.all_types())
658      print(
659          f"Found {n_methods} methods and {n_types} relevant Rust types.", file=sys.stderr
660      )
661  
662      print("== STEP 2: Extracting rustdoc as json", file=sys.stderr)
663  
664      universe.build_and_load_rustdoc()
665  
666      print("== STEP 3: Emitting markdown", file=sys.stderr)
667      if fmt == "md":
668          md_output = output
669      else:
670          md_output = io.StringIO()
671  
672      universe.emit_docs(md_output)
673  
674      if fmt == "html":
675          print("== STEP 4: Converting to HTML", file=sys.stderr)
676          md_html = marko.Markdown()
677          parsed = md_html.parse(md_output.getvalue())
678          output.write(HTML_HEADER)
679          output.write(md_html.render(parsed))
680          output.write(HTML_FOOTER)
681  
682  
683  def main(args):
684      """Invoke rpc-docs-tool using the command-line arguments in args.
685  
686      (Make sure to omit sys.argv[0], or you will overwrite "rpc-docs-tool")
687      """
688      import argparse
689  
690      parser = argparse.ArgumentParser(
691          prog="rpc-docs-tool", description="Generate RPC method docs"
692      )
693      parser.add_argument("output", type=argparse.FileType("w"))
694      parser.add_argument("--format", default=None, choices=["md", "html"], dest="fmt")
695      args = parser.parse_args(args)
696  
697      # Decide what file format to use.
698      if args.fmt is None:
699          fname = args.output.name
700          extension = os.path.splitext(fname)[1]
701          print(extension)
702          if extension in [".md", ".html"]:
703              fmt = extension[1:]
704          else:
705              fmt = "md"  # default
706      else:
707          fmt = args.fmt
708  
709      run(args.output, fmt)
710  
711  
712  if __name__ == "__main__":
713      main(sys.argv[1:])