rpc-docs-tool
1 #!/usr/bin/env python3 2 3 """ 4 Script to generate a document about Arti RPC objects and methods. 5 6 - Extracts a list of methods and objects by running Arti RPC. 7 - Extracts markdown documentation by running nightly rustdoc to get JSON output. 8 9 - Outputs markdown or html. 10 11 Requires rustdoc nightly, for json support. 12 May require specific versions of rustdoc nightly, 13 since the json format is unstable. 14 15 Known to work with nightly from 2024-10-22. 16 """ 17 18 import io 19 import json 20 import os 21 import re 22 import subprocess 23 import sys 24 25 # What version of rustdoc json do we know? 26 SUPPORTED_FORMAT_VERSION = 35 27 # What nightly do we know to provide that version? 28 KNOWN_GOOD_NIGHTLY = "nightly-2024-10-22" 29 30 # Tricky bits, unsolved 31 # - Converting links in the md to be correct. 32 # - Extracting only relevant portions of the rustdoc. 33 34 # Location of the arti checkout. 35 ARTI_ROOT = os.path.split(os.path.dirname(__file__))[0] 36 37 # Where will we find our rustdoc files? 38 # 39 # (This assumes we're being run from an arti checkout.) 40 TARGET_DIR = os.path.join(ARTI_ROOT, "target") 41 42 # With respect to where should we generate our links to objects in rustdoc? 43 RUSTDOC_ROOT = "https://tpo.pages.torproject.net/core/doc/rust/private/" 44 45 # A string we'll use to identify RPC-specific sections in a markdown document. 46 # (Case-insensitive.) 47 MAGIC_HEADING = "In The Arti RPC System" 48 49 # HTML that we'll use to decorate our markdown output. 50 HTML_HEADER = """\ 51 <!DOCTYPE html> 52 <html lang="en"> 53 <head> 54 <meta charset="UTF-8"> 55 <title>Arti RPC: methods and objects</title> 56 </head> 57 """ 58 HTML_FOOTER = """\ 59 </html> 60 """ 61 62 # This isn't installed by default, so try to give a helpful message. 63 try: 64 import marko 65 import marko.md_renderer 66 except ImportError: 67 print( 68 "You need to install marko so that we can parse and manipulate markdown.", 69 file=sys.stderr, 70 ) 71 print("Try `pip3 install marko`.", file=sys.stderr) 72 sys.exit(1) 73 74 # This is part of this crate, so try using the version here if we can't 75 # find a system copy. 76 try: 77 import arti_rpc 78 except ImportError: 79 print("arti_rpc not installed. Faking it...", file=sys.stderr) 80 pypath = os.path.join(ARTI_ROOT, "python", "arti_rpc", "src") 81 sys.path.append(pypath) 82 import arti_rpc 83 84 85 def load_methods_via_rpc(): 86 """ 87 Connect to Arti via RPC and ask it for a list of RPC methods. 88 89 Return value is as for "x_list_all_rpc_methods". 90 """ 91 92 # TODO RPC: This isn't how connection strings will work in production 93 socket_path = os.path.expanduser("~/.local/run/arti/SOCKET") 94 connect_string = f"unix:{socket_path}" 95 try: 96 connection = arti_rpc.ArtiRpcConn(connect_string) 97 except arti_rpc.ArtiRpcError as e: 98 print(f"Cannot connect to Arti via RPC: {e}") 99 print( 100 "Is Arti running? Is it built with RPC support? Did you configure it to listen on RPC?" 101 ) 102 sys.exit(1) 103 104 # Get a list of RPC methods. 105 methods = connection.session().invoke("arti:x_list_all_rpc_methods") 106 del connection # TODO: Implement a connection.close(); Implement "with connection". 107 return methods 108 109 110 def mdheader(s, anchor): 111 """ 112 Return Text for a markdown header with 113 text `s` (including the ##### header prefix), marking it with a given HTML anchor. 114 """ 115 return f""" 116 <div id="{anchor}"> 117 118 {s} 119 120 </div> 121 122 """ 123 124 125 def doc_subject(doc): 126 """ 127 Return everything from `doc` up to the first blank line. 128 129 (By rustdoc convention, this is a summary about the documented item.) 130 """ 131 lines = [s.strip() for s in doc.split("\n")] 132 try: 133 blank = lines.index("") 134 return " ".join(lines[:blank]) 135 except ValueError: 136 return " ".join(lines) 137 138 139 # An object to parse and render markdown 140 MD = marko.Markdown(renderer=marko.md_renderer.MarkdownRenderer) 141 142 143 def heading_matches(h, s): 144 """ 145 Return true if the markdown Heading object `h` has exactly 146 the case-insensitive text `s`. 147 """ 148 if len(h.children) != 1: 149 return False 150 if h.children[0].get_type() != "RawText": 151 return False 152 h_text = h.children[0].children 153 return h_text.strip().lower() == s.strip().lower() 154 155 156 def extract_md_section(document, magic_heading=MAGIC_HEADING): 157 """ 158 Try to extract the special markdown section marked with `magic_heading` 159 from the markdown `document`. 160 161 The section continues until the beginning of a section header 162 with an equal or greater priority. 163 164 On success, replace the whole document with the section. 165 166 If the section is not found, leave the document alone. 167 """ 168 new_children = [] 169 in_section = False 170 section_h_level = None 171 172 for elt in document.children: 173 is_heading = elt.get_type() == "Heading" 174 if not in_section and is_heading: 175 if heading_matches(elt, magic_heading): 176 in_section = True 177 section_h_level = elt.level 178 179 elif in_section and is_heading: 180 if elt.level > section_h_level: 181 # new heading inside the desired section 182 new_children.append(elt) 183 else: 184 # Started a new section of the same or higher level. 185 in_section = False 186 187 elif in_section: 188 new_children.append(elt) 189 190 # If we found the section at all, we replace the document. 191 if new_children: 192 document.children = new_children 193 194 195 def recursively_fix_links(doc, linkfixer): 196 """ 197 Run the function `linkfixer` on every markdown link inside `doc`. 198 """ 199 for elt in doc.children: 200 if elt.get_type() == "Link": 201 linkfixer(elt) 202 if isinstance(elt.children, list): 203 recursively_fix_links(elt, linkfixer) 204 205 206 def adjust_doc(doc, link_table, outer_h_level=3): 207 """ 208 Modify a markdown document that we've gotten from rustdoc json: 209 210 - If there is a section named with `MAGIC_HEADING`, 211 include only that section. 212 213 - Increment or decrement the heading level so that the entire document 214 nests within a heading of level outer_h_level. 215 216 - Adjust every link that we find. (TODO RPC: Implement!) 217 218 Returns a string rendering of the document. 219 """ 220 parser = marko.parser.Parser() 221 222 # We add a bunch of Rustdoc link definitions to the end of the document, 223 # since otherwise marko won't know how to handle them. 224 parts = [doc, "\n\n"] 225 for k, v in link_table.items(): 226 parts.append(f"[{k}]: {v}\n") 227 doc = "".join(parts) 228 229 document = parser.parse(doc) 230 231 #### 232 # Pull out the section that's called MAGIC_HEADING, if any. 233 extract_md_section(document) 234 235 #### 236 # Adjust heading levels so that they nest within outer_h_level. 237 try: 238 min_hlevel = min( 239 elt.level for elt in document.children if elt.get_type() == "Heading" 240 ) 241 adjustment = outer_h_level + 1 - min_hlevel 242 except ValueError: 243 # There aren't any headings, so we pick an unworkable adjustment. 244 adjustment = "unused" 245 for elt in document.children: 246 if elt.get_type == "Heading": 247 elt.level += adjustment 248 249 #### 250 # Make rustdoc links into something better. 251 # 252 # TODO: decide what to actually do here. 253 recursively_fix_links(document, lambda lnk: None) 254 255 return MD.render(document) 256 257 258 def un_generic(s): 259 """ 260 Remove all generics from a rust type `s` 261 """ 262 if "<" in s: 263 idx = s.find("<") 264 return s[:idx] 265 else: 266 return s 267 268 269 class RustIdent: 270 """ 271 A Rust identifier, taken from a Rust type returned by Arti RPC. 272 """ 273 274 def __init__(self, ident): 275 assert ident is not None 276 self.ident = un_generic(ident) 277 278 def crate(self): 279 """Return the crate in which this type is declared.""" 280 # Note: We unconditionally replace _ with - in the crate name. 281 # This is valid for Arti crates, which are never named with _, 282 # but it isn't valid in general. 283 return self.ident.split("::")[0].replace("_", "-") 284 285 def __eq__(self, other): 286 return self.ident == other.ident 287 288 def __hash__(self): 289 return hash(self.ident) 290 291 def __repr__(self): 292 return f"RustIdent({self.ident!r})" 293 294 def __str__(self): 295 return self.ident 296 297 298 class RpcUniverse: 299 """ 300 A set of RPC methods, objects, and result types. 301 """ 302 303 # Fields 304 # 305 # methods: A map from method name dicts of `applies_to_object_types`, 306 # `method_type`, `output_type`, and an optional `update_type`. 307 # delegations: A map from RustIdent for an RPC Object type to a 308 # list of the RustIdents which it can delegate to. 309 # rustdoc: A map from crate to Rustdoc object. 310 def __init__(self, method_list): 311 self.methods = dict() 312 for mname, minfo in method_list["methods"].items(): 313 atypes = [RustIdent(t) for t in minfo["applies_to_object_types"]] 314 mtype = RustIdent(minfo["method_type"]) 315 otype = RustIdent(minfo["output_type"]) 316 if minfo["update_type"] is not None: 317 utype = RustIdent(minfo["update_type"]) 318 else: 319 utype = None 320 self.methods[mname] = { 321 "applies_to_object_types": atypes, 322 "method_type": mtype, 323 "output_type": otype, 324 "update_type": utype, 325 } 326 327 self.delegations = dict() 328 for t, lst in method_list["delegations"].items(): 329 self.delegations[RustIdent(t)] = [RustIdent(t2) for t2 in lst] 330 331 self.apply_delegations() 332 self.rustdoc = dict() 333 334 def method_types(self): 335 """Return a set of every known RPC method""" 336 return set(minfo["method_type"] for minfo in self.methods.values()) 337 338 def object_types(self): 339 """Return a set of every known RPC object type""" 340 s = set() 341 for minfo in self.methods.values(): 342 s.update(minfo["applies_to_object_types"]) 343 return s 344 345 def output_types(self): 346 """Return a set of every known type that can be output from an RPC method""" 347 return set(minfo["output_type"] for minfo in self.methods.values()) 348 349 def update_types(self): 350 """Return a set of every known type that can be an update from an RPC method.""" 351 return set( 352 minfo["update_type"] 353 for minfo in self.methods.values() 354 if minfo["update_type"] is not None 355 ) 356 357 def all_types(self): 358 """Return a set of all rust types that are relevant for RPC.""" 359 s = self.method_types() 360 s.update(self.object_types()) 361 s.update(self.output_types()) 362 s.update(self.update_types()) 363 return s 364 365 def relevant_crates(self): 366 """Return a list of crates that contain a type relevant to RPC.""" 367 return set(ident.crate() for ident in self.all_types()) 368 369 def apply_delegations(self): 370 """ 371 Adjust the `applies_to_object_types` field for every method, 372 to take into account object delegation. 373 """ 374 for minfo in self.methods.values(): 375 to_add = [] 376 applies_list = minfo["applies_to_object_types"] 377 for applies_to in applies_list: 378 if applies_to in self.delegations: 379 to_add.extend(self.delegations[applies_to]) 380 381 for add in to_add: 382 if add not in applies_list: 383 applies_list.append(add) 384 385 def build_and_load_rustdoc(self): 386 """ 387 Build and load the rustdoc for every relevant crate. 388 """ 389 for crate in self.relevant_crates(): 390 build_rustdoc_json(crate) 391 path = os.path.join(TARGET_DIR, "doc", crate.replace("-", "_") + ".json") 392 with open(path, "r", encoding="UTF-8") as f: 393 doc = json.load(f) 394 self.rustdoc[crate] = Rustdoc(doc) 395 396 def get_methods_for_obj(self, ident): 397 """ 398 Return a generator over every method that can apply to the object `ident`. 399 """ 400 for mname, minfo in self.methods.items(): 401 if ident in minfo["applies_to_object_types"]: 402 yield mname 403 404 def get_doc(self, ident): 405 """ 406 Return the rustdoc for the object `ident`. 407 """ 408 return self.rustdoc[ident.crate()].get_doc(ident) 409 410 def get_link_table(self, ident): 411 """ 412 Return a link table to interpret links in the rustdoc 413 for the object `ident`. 414 """ 415 return self.rustdoc[ident.crate()].get_link_table(ident) 416 417 def get_rustdoc_url(self, ident): 418 """ 419 Return a canonical rustdoc URL (within RUSTDOC_ROOT) for `ident`. 420 """ 421 return self.rustdoc[ident.crate()].get_rustdoc_url(ident) 422 423 def emit_method_index(self, f): 424 """ 425 Write to `f` an index of all known RPC methods. 426 """ 427 f.write(mdheader("### Method index", "idx:methods")) 428 for m, obj in self.methods.items(): 429 m_str = m 430 m_anchor = "method:" + m_str 431 m_ident = obj["method_type"] 432 m_summary = doc_subject(self.get_doc(m_ident)) 433 f.write(f"- [`{m_str}`](#{m_anchor}) — {m_summary}\n") 434 f.write("\n\n") 435 436 def emit_single_method(self, f, m_str, obj): 437 """ 438 Write to `f` the documentation for the single method m_str, whose "minfo" 439 object is `obj`. 440 """ 441 m_ident = obj["method_type"] 442 m_doc = self.get_doc(m_ident) 443 link_table = self.get_link_table(m_ident) 444 m_summary = doc_subject(m_doc) 445 m_anchor = "method:" + m_str 446 header = f"### `{m_str}` — {m_summary}" 447 f.write(mdheader(header, m_anchor)) 448 f.write(adjust_doc(m_doc, link_table)) 449 f.write("\n\n") 450 451 rustdoc_url = self.get_rustdoc_url(m_ident) 452 f.write(f"- [Rustdoc]({rustdoc_url})\n") 453 if self.rustdoc[m_ident.crate()].type_has_no_fields(m_ident): 454 f.write("- (Takes no parameters)\n") 455 else: 456 f.write(f"- [Parameters]({rustdoc_url}#fields)\n") 457 otype = obj["output_type"] 458 otype_url = self.get_rustdoc_url(otype) 459 f.write(f"- Returns [`{otype}`]({otype_url})\n") 460 utype = obj["update_type"] 461 if utype is not None: 462 utype_url = self.get_rustdoc_url(utype) 463 f.write(f"- Yields incremental updates of [`{utype}`]({utype_url})\n") 464 else: 465 f.write("- (No incremental updates)\n") 466 467 f.write("- **Implemented by**\n") 468 for itype in obj["applies_to_object_types"]: 469 itype_url = self.get_rustdoc_url(itype) 470 anchor = f"object:{itype}" 471 f.write(f" - [`{itype}`](#{anchor}) ([Rustdoc]({itype_url}))\n") 472 473 f.write("\n\n") 474 475 def emit_methods(self, f): 476 """ 477 Write to `f` all per-method documentation. 478 """ 479 for m, obj in self.methods.items(): 480 self.emit_single_method(f, m, obj) 481 482 def emit_object_index(self, f): 483 """ 484 Write to `f` an index of all known RPC object types. 485 """ 486 f.write(mdheader("### Object index", "idx:objects")) 487 488 for o_name in self.object_types(): 489 o_anchor = f"object:{o_name}" 490 o_summary = doc_subject(self.get_doc(o_name)) 491 f.write(f"- [`{o_name}`](#{o_anchor}) — {o_summary}\n") 492 f.write("\n\n") 493 494 def emit_single_object(self, f, o_name): 495 """ 496 Write to `f` the documentation for a single RPC object whose type is o_name. 497 """ 498 o_anchor = f"object:{o_name}" 499 o_doc = self.get_doc(o_name) 500 link_table = self.get_link_table(o_name) 501 o_summary = doc_subject(o_doc) 502 header = f"### `{o_name}` — {o_summary}" 503 f.write(mdheader(header, o_anchor)) 504 f.write(adjust_doc(o_doc, link_table)) 505 f.write("\n\n") 506 rustdoc_url = self.get_rustdoc_url(o_name) 507 f.write(f"- [Rustdoc]({rustdoc_url})\n") 508 f.write("- **Implements methods**\n") 509 510 for m_name in sorted(list(self.get_methods_for_obj(o_name))): 511 m_anchor = "method:" + m_name 512 f.write(f" - [`{m_name}`](#{m_anchor})\n") 513 f.write("\n\n") 514 515 def emit_objects(self, f): 516 """ 517 Write all per-object documentation to `f`. 518 """ 519 for o_name in self.object_types(): 520 self.emit_single_object(f, o_name) 521 522 def emit_docs(self, f): 523 """ 524 Write all markdown documentation to `f` 525 """ 526 f.write(mdheader("## Methods", "methods")) 527 self.emit_method_index(f) 528 self.emit_methods(f) 529 f.write("\n----\n") 530 531 f.write(mdheader("## Objects", "objects")) 532 self.emit_object_index(f) 533 self.emit_objects(f) 534 535 536 have_warned_about_json_version = False 537 538 539 def warn_about_json_version(actual_version): 540 """ 541 If we haven't previously done so, warn the user that their nightly 542 is generating an unrecognized version of rustdoc json. 543 """ 544 global have_warned_about_json_version 545 if not have_warned_about_json_version: 546 have_warned_about_json_version = True 547 msg = f""" 548 WARNING: Rustdoc json is in format version {actual_version}, 549 but this tool expects {SUPPORTED_FORMAT_VERSION}. 550 If this fails, you might need to fix the tool, 551 or switch to {KNOWN_GOOD_NIGHTLY}. 552 """ 553 msg = re.sub(r"\s+", " ", msg).strip() 554 print(msg) 555 556 557 class Rustdoc: 558 """ 559 The rustdoc for a single crate. 560 """ 561 562 def __init__(self, json_doc): 563 actual_version = json_doc.get("format_version") 564 if actual_version != SUPPORTED_FORMAT_VERSION: 565 warn_about_json_version(actual_version) 566 567 self.doc = json_doc 568 569 self.idx_by_ident = dict() 570 for idx, obj in self.doc["paths"].items(): 571 path = "::".join(obj["path"]) 572 self.idx_by_ident[RustIdent(path)] = idx 573 574 def get_doc(self, ident): 575 """ 576 Find the string holding documentation for a single identifier within this crate. 577 """ 578 obj = self.doc["index"][self.idx_by_ident[ident]] 579 return obj["docs"] 580 581 def get_link_table(self, ident): 582 """ 583 Return a dict mapping reference link ID to corresponding URLs for 584 the documentation of `ident`. 585 """ 586 obj = self.doc["index"][self.idx_by_ident[ident]] 587 links = obj["links"] 588 table = {} 589 for content, idx in links.items(): 590 table[content] = self.get_rustdoc_url_by_idx(idx) 591 return table 592 593 def get_rustdoc_url(self, ident): 594 """ 595 Return a URL for the documentation for an identifier within this crate, 596 relative to a rustdoc installation at RUSTDOC_ROOT. 597 """ 598 idx = self.idx_by_ident[ident] 599 return self.get_rustdoc_url_by_idx(idx) 600 601 def get_rustdoc_url_by_idx(self, idx): 602 """ 603 Return a URL according to a rustdoc "index". 604 """ 605 obj = self.doc["paths"][str(idx)] 606 kind = obj["kind"] 607 path = obj["path"][:] 608 path[-1] = f"{kind}.{path[-1]}.html" 609 return RUSTDOC_ROOT + "/".join(path) 610 611 def type_has_no_fields(self, ident): 612 """Return true if `ident` is a type that definitely has no fields.""" 613 obj = self.doc["index"][self.idx_by_ident[ident]] 614 try: 615 fields = obj["inner"]["struct"]["kind"]["plain"]["fields"] 616 except KeyError: 617 return False 618 619 return len(fields) == 0 620 621 622 def build_rustdoc_json(crate): 623 """ 624 Use `cargo +nightly rustdoc` to build the json rustdoc for a single crate. 625 """ 626 args = [ 627 "cargo", 628 "+nightly", 629 "rustdoc", 630 "--quiet", 631 "--output-format", 632 "json", 633 "--all-features", 634 "-Zunstable-options", 635 "-p", 636 crate, 637 "--", 638 "--document-private-items", 639 ] 640 subprocess.run(args, check=True) 641 642 643 def run(output, fmt="md"): 644 """Process input from arti and rustdoc, and write a combined document 645 to `output`. 646 647 Write markdown if `fmt` is "md", and HTML if `fmt` is "html". 648 """ 649 print( 650 "== STEP 1: Asking Arti RPC for a list of types and methods.", file=sys.stderr 651 ) 652 653 methods = load_methods_via_rpc() 654 universe = RpcUniverse(methods) 655 656 n_methods = len(methods["methods"]) 657 n_types = len(universe.all_types()) 658 print( 659 f"Found {n_methods} methods and {n_types} relevant Rust types.", file=sys.stderr 660 ) 661 662 print("== STEP 2: Extracting rustdoc as json", file=sys.stderr) 663 664 universe.build_and_load_rustdoc() 665 666 print("== STEP 3: Emitting markdown", file=sys.stderr) 667 if fmt == "md": 668 md_output = output 669 else: 670 md_output = io.StringIO() 671 672 universe.emit_docs(md_output) 673 674 if fmt == "html": 675 print("== STEP 4: Converting to HTML", file=sys.stderr) 676 md_html = marko.Markdown() 677 parsed = md_html.parse(md_output.getvalue()) 678 output.write(HTML_HEADER) 679 output.write(md_html.render(parsed)) 680 output.write(HTML_FOOTER) 681 682 683 def main(args): 684 """Invoke rpc-docs-tool using the command-line arguments in args. 685 686 (Make sure to omit sys.argv[0], or you will overwrite "rpc-docs-tool") 687 """ 688 import argparse 689 690 parser = argparse.ArgumentParser( 691 prog="rpc-docs-tool", description="Generate RPC method docs" 692 ) 693 parser.add_argument("output", type=argparse.FileType("w")) 694 parser.add_argument("--format", default=None, choices=["md", "html"], dest="fmt") 695 args = parser.parse_args(args) 696 697 # Decide what file format to use. 698 if args.fmt is None: 699 fname = args.output.name 700 extension = os.path.splitext(fname)[1] 701 print(extension) 702 if extension in [".md", ".html"]: 703 fmt = extension[1:] 704 else: 705 fmt = "md" # default 706 else: 707 fmt = args.fmt 708 709 run(args.output, fmt) 710 711 712 if __name__ == "__main__": 713 main(sys.argv[1:])