analyze_link_failures.jl
1 #!/usr/bin/env julia 2 3 """ 4 Link Failure Analysis Script 5 6 This script analyzes the 502 link failures and categorizes them by type. 7 """ 8 9 using Pkg 10 Pkg.activate("docs") 11 12 # Include the test modules 13 include("docs/test/LinkValidator.jl") 14 using .LinkValidator 15 16 # Run link validation and capture results 17 println("Running link validation...") 18 results = validate_all_links("docs/src") 19 20 # Categorize failures 21 missing_files = String[] 22 malformed_links = String[] 23 self_references = String[] 24 at_ref_issues = String[] 25 external_issues = String[] 26 path_issues = String[] 27 28 for result in results 29 if !result.valid 30 link = result.url 31 source = result.source_file 32 33 # Categorize the failure 34 if startswith(link, "@ref") 35 push!(at_ref_issues, "$(source): $(link)") 36 elseif contains(link, "[") && contains(link, "](") 37 # Malformed markdown link 38 push!(malformed_links, "$(source): $(link)") 39 elseif startswith(link, "http") 40 push!(external_issues, "$(source): $(link)") 41 elseif contains(link, "../") && contains(basename(source), basename(link)) 42 # Self-reference 43 push!(self_references, "$(source): $(link)") 44 else 45 # Check if target file actually exists 46 target_path = if startswith(link, "../") 47 # Relative path from source file 48 source_dir = dirname(source) 49 joinpath(source_dir, link) 50 else 51 link 52 end 53 54 # Normalize path 55 target_path = normpath(target_path) 56 57 if isfile(target_path) 58 push!(path_issues, "$(source): $(link) -> $(target_path) (EXISTS but validator says missing)") 59 else 60 push!(missing_files, "$(source): $(link) -> $(target_path)") 61 end 62 end 63 end 64 end 65 66 # Print analysis 67 println("\n=== LINK FAILURE ANALYSIS ===") 68 println("Total failures: $(length([r for r in results if !r.valid]))") 69 println() 70 71 println("1. @ref Issues ($(length(at_ref_issues))):") 72 for issue in at_ref_issues[1:min(10, end)] 73 println(" $issue") 74 end 75 if length(at_ref_issues) > 10 76 println(" ... and $(length(at_ref_issues) - 10) more") 77 end 78 println() 79 80 println("2. Malformed Links ($(length(malformed_links))):") 81 for issue in malformed_links[1:min(10, end)] 82 println(" $issue") 83 end 84 if length(malformed_links) > 10 85 println(" ... and $(length(malformed_links) - 10) more") 86 end 87 println() 88 89 println("3. Self-References ($(length(self_references))):") 90 for issue in self_references[1:min(5, end)] 91 println(" $issue") 92 end 93 if length(self_references) > 5 94 println(" ... and $(length(self_references) - 5) more") 95 end 96 println() 97 98 println("4. Path Resolution Issues ($(length(path_issues))):") 99 for issue in path_issues[1:min(10, end)] 100 println(" $issue") 101 end 102 if length(path_issues) > 10 103 println(" ... and $(length(path_issues) - 10) more") 104 end 105 println() 106 107 println("5. Actually Missing Files ($(length(missing_files))):") 108 for issue in missing_files[1:min(10, end)] 109 println(" $issue") 110 end 111 if length(missing_files) > 10 112 println(" ... and $(length(missing_files) - 10) more") 113 end 114 println() 115 116 println("6. External Link Issues ($(length(external_issues))):") 117 for issue in external_issues[1:min(5, end)] 118 println(" $issue") 119 end 120 if length(external_issues) > 5 121 println(" ... and $(length(external_issues) - 5) more") 122 end 123 println() 124 125 # Find most frequently referenced missing files 126 missing_targets = Dict{String, Int}() 127 for result in results 128 if !result.valid && !startswith(result.url, "http") && !startswith(result.url, "@ref") 129 target = result.url 130 missing_targets[target] = get(missing_targets, target, 0) + 1 131 end 132 end 133 134 println("Most frequently referenced targets:") 135 sorted_targets = sort(collect(missing_targets), by=x->x[2], rev=true) 136 for (target, count) in sorted_targets[1:min(15, end)] 137 println(" $count times: $target") 138 end