convert-notebooks.py
1 """ 2 Converts all .ipynb files from the docs/ folder into .mdx files. 3 4 This script uses nbconvert to do the processing. 5 """ 6 7 import multiprocessing 8 import re 9 from pathlib import Path 10 11 import nbformat 12 import yaml 13 from nbconvert.exporters import MarkdownExporter 14 from nbconvert.preprocessors import Preprocessor 15 16 SOURCE_DIR = Path("docs/") 17 NOTEBOOK_BASE_EDIT_URL = "https://github.com/mlflow/mlflow/edit/master/docs/" 18 NOTEBOOK_BASE_DOWNLOAD_URL = "https://raw.githubusercontent.com/mlflow/mlflow/master/docs/" 19 20 21 class EscapeBackticksPreprocessor(Preprocessor): 22 def preprocess_cell(self, cell, resources, cell_index): 23 if cell.cell_type == "code": 24 # escape backticks, as code blocks will be rendered 25 # inside a custom react component like: 26 # <NotebookCellOutput>`{{ content }}`</NotebookCellOutput> 27 # and having the backticks causes issues 28 cell.source = cell.source.replace("`", r"\`") 29 30 if "outputs" in cell: 31 for i, output in enumerate(cell["outputs"]): 32 if "text" in output: 33 output["text"] = output["text"].replace("`", r"\`") 34 elif "data" in output: 35 for key, value in output["data"].items(): 36 if isinstance(value, str): 37 output["data"][key] = value.replace("`", r"\`") 38 elif cell.cell_type == "raw": 39 cell.source = cell.source.replace("<br>", "<br />") 40 41 return cell, resources 42 43 44 exporter = MarkdownExporter( 45 preprocessors=[EscapeBackticksPreprocessor], 46 template_name="mdx", 47 extra_template_basedirs=["./scripts/nbconvert_templates"], 48 ) 49 50 51 def add_frontmatter( 52 body: str, 53 nb_path: Path, 54 ) -> str: 55 frontmatter = { 56 "custom_edit_url": NOTEBOOK_BASE_EDIT_URL + str(nb_path), 57 "slug": nb_path.stem, 58 } 59 formatted_frontmatter = yaml.dump(frontmatter) 60 61 return f"""--- 62 {formatted_frontmatter} 63 --- 64 65 {body}""" 66 67 68 def add_download_button( 69 body: str, 70 nb_path: Path, 71 ) -> str: 72 download_url = NOTEBOOK_BASE_DOWNLOAD_URL + str(nb_path) 73 download_button = f'<NotebookDownloadButton href="{download_url}">Download this notebook</NotebookDownloadButton>' 74 75 # Insert the notebook underneath the first H1 header (assumed to be the title) 76 pattern = r"(^#\s+.+$)" 77 return re.sub(pattern, rf"\1\n\n{download_button}", body, count=1, flags=re.MULTILINE) 78 79 80 # add the imports for our custom cell output components 81 def add_custom_component_imports( 82 body: str, 83 ) -> str: 84 return f"""import {{ NotebookCodeCell }} from "@site/src/components/NotebookCodeCell" 85 import {{ NotebookCellOutput }} from "@site/src/components/NotebookCellOutput" 86 import {{ NotebookHTMLOutput }} from "@site/src/components/NotebookHTMLOutput" 87 import {{ NotebookDownloadButton }} from "@site/src/components/NotebookDownloadButton" 88 89 {body} 90 """ 91 92 93 def convert_path(nb_path: Path): 94 mdx_path = nb_path.with_stem(nb_path.stem + "-ipynb").with_suffix(".mdx") 95 with open(nb_path) as f: 96 nb = nbformat.read(f, as_version=4) 97 98 body, _ = exporter.from_notebook_node(nb) 99 body = add_custom_component_imports(body) 100 body = add_frontmatter(body, nb_path) 101 body = add_download_button(body, nb_path) 102 103 with open(mdx_path, "w") as f: 104 f.write(body) 105 106 return mdx_path 107 108 109 def main(): 110 nb_paths = list(SOURCE_DIR.rglob("*.ipynb")) 111 112 with multiprocessing.Pool() as pool: 113 pool.map(convert_path, nb_paths) 114 115 116 if __name__ == "__main__": 117 main()