/ docs / scripts / analyze_link_failures.jl
analyze_link_failures.jl
  1  #!/usr/bin/env julia
  2  
  3  """
  4  Link Failure Analysis Script
  5  
  6  This script analyzes the 502 link failures and categorizes them by type.
  7  """
  8  
  9  using Pkg
 10  Pkg.activate("docs")
 11  
 12  # Include the test modules
 13  include("docs/test/LinkValidator.jl")
 14  using .LinkValidator
 15  
 16  # Run link validation and capture results
 17  println("Running link validation...")
 18  results = validate_all_links("docs/src")
 19  
 20  # Categorize failures
 21  missing_files = String[]
 22  malformed_links = String[]
 23  self_references = String[]
 24  at_ref_issues = String[]
 25  external_issues = String[]
 26  path_issues = String[]
 27  
 28  for result in results
 29      if !result.valid
 30          link = result.url
 31          source = result.source_file
 32          
 33          # Categorize the failure
 34          if startswith(link, "@ref")
 35              push!(at_ref_issues, "$(source): $(link)")
 36          elseif contains(link, "[") && contains(link, "](")
 37              # Malformed markdown link
 38              push!(malformed_links, "$(source): $(link)")
 39          elseif startswith(link, "http")
 40              push!(external_issues, "$(source): $(link)")
 41          elseif contains(link, "../") && contains(basename(source), basename(link))
 42              # Self-reference
 43              push!(self_references, "$(source): $(link)")
 44          else
 45              # Check if target file actually exists
 46              target_path = if startswith(link, "../")
 47                  # Relative path from source file
 48                  source_dir = dirname(source)
 49                  joinpath(source_dir, link)
 50              else
 51                  link
 52              end
 53              
 54              # Normalize path
 55              target_path = normpath(target_path)
 56              
 57              if isfile(target_path)
 58                  push!(path_issues, "$(source): $(link) -> $(target_path) (EXISTS but validator says missing)")
 59              else
 60                  push!(missing_files, "$(source): $(link) -> $(target_path)")
 61              end
 62          end
 63      end
 64  end
 65  
 66  # Print analysis
 67  println("\n=== LINK FAILURE ANALYSIS ===")
 68  println("Total failures: $(length([r for r in results if !r.valid]))")
 69  println()
 70  
 71  println("1. @ref Issues ($(length(at_ref_issues))):")
 72  for issue in at_ref_issues[1:min(10, end)]
 73      println("   $issue")
 74  end
 75  if length(at_ref_issues) > 10
 76      println("   ... and $(length(at_ref_issues) - 10) more")
 77  end
 78  println()
 79  
 80  println("2. Malformed Links ($(length(malformed_links))):")
 81  for issue in malformed_links[1:min(10, end)]
 82      println("   $issue")
 83  end
 84  if length(malformed_links) > 10
 85      println("   ... and $(length(malformed_links) - 10) more")
 86  end
 87  println()
 88  
 89  println("3. Self-References ($(length(self_references))):")
 90  for issue in self_references[1:min(5, end)]
 91      println("   $issue")
 92  end
 93  if length(self_references) > 5
 94      println("   ... and $(length(self_references) - 5) more")
 95  end
 96  println()
 97  
 98  println("4. Path Resolution Issues ($(length(path_issues))):")
 99  for issue in path_issues[1:min(10, end)]
100      println("   $issue")
101  end
102  if length(path_issues) > 10
103      println("   ... and $(length(path_issues) - 10) more")
104  end
105  println()
106  
107  println("5. Actually Missing Files ($(length(missing_files))):")
108  for issue in missing_files[1:min(10, end)]
109      println("   $issue")
110  end
111  if length(missing_files) > 10
112      println("   ... and $(length(missing_files) - 10) more")
113  end
114  println()
115  
116  println("6. External Link Issues ($(length(external_issues))):")
117  for issue in external_issues[1:min(5, end)]
118      println("   $issue")
119  end
120  if length(external_issues) > 5
121      println("   ... and $(length(external_issues) - 5) more")
122  end
123  println()
124  
125  # Find most frequently referenced missing files
126  missing_targets = Dict{String, Int}()
127  for result in results
128      if !result.valid && !startswith(result.url, "http") && !startswith(result.url, "@ref")
129          target = result.url
130          missing_targets[target] = get(missing_targets, target, 0) + 1
131      end
132  end
133  
134  println("Most frequently referenced targets:")
135  sorted_targets = sort(collect(missing_targets), by=x->x[2], rev=true)
136  for (target, count) in sorted_targets[1:min(15, end)]
137      println("   $count times: $target")
138  end