final_content_audit.jl
1 #!/usr/bin/env julia 2 3 """ 4 Final Content Audit and Cleanup Script 5 6 This script performs a comprehensive audit of all migrated content for consistency, 7 accuracy, and completeness. It identifies and fixes issues to ensure all requirements 8 are met and validated. 9 10 Requirements addressed: 11 - 2.1: Consistent content structure 12 - 4.1: Content accuracy and completeness 13 - 4.4: Quality assurance and validation 14 """ 15 16 using Test 17 18 # Global tracking 19 audit_results = Dict{String, Any}() 20 issues_found = String[] 21 fixes_applied = String[] 22 cleanup_actions = String[] 23 24 function audit_frontmatter_consistency() 25 println("๐ Auditing Frontmatter Consistency...") 26 27 # Required frontmatter fields for each category 28 required_fields = Dict( 29 "getting-started" => ["title", "description", "category", "difficulty", "estimated_time", "topics", "last_updated"], 30 "guides" => ["title", "description", "category", "difficulty", "topics", "last_updated"], 31 "reference" => ["title", "description", "category", "difficulty", "topics", "last_updated"], 32 "troubleshooting" => ["title", "description", "category", "difficulty", "topics", "last_updated"], 33 "advanced" => ["title", "description", "category", "difficulty", "topics", "last_updated"], 34 "resources" => ["title", "description", "category", "difficulty", "topics", "last_updated"] 35 ) 36 37 # Find all markdown files 38 md_files = String[] 39 for root in ["docs/src/getting-started", "docs/src/guides", "docs/src/reference", 40 "docs/src/troubleshooting", "docs/src/advanced", "docs/src/resources"] 41 if isdir(root) 42 for file in readdir(root, join=true) 43 if endswith(file, ".md") 44 push!(md_files, file) 45 end 46 end 47 end 48 end 49 50 # Add main files 51 push!(md_files, "docs/src/index.md") 52 53 frontmatter_issues = String[] 54 55 for filepath in md_files 56 if !isfile(filepath) 57 continue 58 end 59 60 content = read(filepath, String) 61 62 # Check for frontmatter 63 if !startswith(content, "---") 64 push!(frontmatter_issues, "$filepath: Missing frontmatter") 65 continue 66 end 67 68 # Extract frontmatter 69 lines = split(content, '\n') 70 frontmatter_end = findfirst(i -> i > 1 && lines[i] == "---", 1:length(lines)) 71 72 if frontmatter_end === nothing 73 push!(frontmatter_issues, "$filepath: Malformed frontmatter") 74 continue 75 end 76 77 frontmatter = join(lines[2:frontmatter_end-1], '\n') 78 79 # Determine category from path or frontmatter 80 category = "general" 81 if contains(filepath, "getting-started") 82 category = "getting-started" 83 elseif contains(filepath, "guides") 84 category = "guides" 85 elseif contains(filepath, "reference") 86 category = "reference" 87 elseif contains(filepath, "troubleshooting") 88 category = "troubleshooting" 89 elseif contains(filepath, "advanced") 90 category = "advanced" 91 elseif contains(filepath, "resources") 92 category = "resources" 93 end 94 95 # Check required fields 96 if haskey(required_fields, category) 97 missing_fields = String[] 98 for field in required_fields[category] 99 if !occursin("$field:", frontmatter) 100 push!(missing_fields, field) 101 end 102 end 103 104 if !isempty(missing_fields) 105 push!(frontmatter_issues, "$filepath: Missing fields: $(join(missing_fields, ", "))") 106 end 107 end 108 end 109 110 audit_results["frontmatter_issues"] = frontmatter_issues 111 return frontmatter_issues 112 end 113 114 function audit_content_structure() 115 println("๐ Auditing Content Structure...") 116 117 structure_issues = String[] 118 119 # Check getting-started files for proper structure 120 getting_started_files = [ 121 ("docs/src/getting-started/index.md", ["Learning Objectives", "Section Overview", "Prerequisites"]), 122 ("docs/src/getting-started/installation.md", ["Prerequisites", "Method 1", "Method 2", "Verification"]), 123 ("docs/src/getting-started/quick-start.md", ["What You'll Accomplish", "Step 1", "Step 2", "Step 3"]), 124 ("docs/src/getting-started/first-strategy.md", ["What You'll Learn", "Prerequisites", "Step 1", "Step 2"]) 125 ] 126 127 for (filepath, required_sections) in getting_started_files 128 if !isfile(filepath) 129 push!(structure_issues, "$filepath: File missing") 130 continue 131 end 132 133 content = read(filepath, String) 134 missing_sections = String[] 135 136 for section in required_sections 137 if !occursin(section, content) 138 push!(missing_sections, section) 139 end 140 end 141 142 if !isempty(missing_sections) 143 push!(structure_issues, "$filepath: Missing sections: $(join(missing_sections, ", "))") 144 end 145 end 146 147 audit_results["structure_issues"] = structure_issues 148 return structure_issues 149 end 150 151 function audit_link_integrity() 152 println("๐ Auditing Link Integrity...") 153 154 link_issues = String[] 155 156 # Files to check for links 157 files_to_check = [ 158 "docs/src/index.md", 159 "docs/src/getting-started/index.md", 160 "docs/src/getting-started/installation.md", 161 "docs/src/getting-started/quick-start.md", 162 "docs/src/getting-started/first-strategy.md" 163 ] 164 165 for filepath in files_to_check 166 if !isfile(filepath) 167 continue 168 end 169 170 content = read(filepath, String) 171 172 # Find all markdown links 173 link_pattern = r"\[([^\]]+)\]\(([^)]+)\)" 174 links = collect(eachmatch(link_pattern, content)) 175 176 for link_match in links 177 link_url = link_match.captures[2] 178 179 # Skip external links and anchors 180 if startswith(link_url, "http") || startswith(link_url, "#") 181 continue 182 end 183 184 # Resolve relative path 185 clean_url = split(link_url, '#')[1] 186 if startswith(clean_url, "../") 187 full_path = normpath(joinpath(dirname(filepath), clean_url)) 188 else 189 full_path = joinpath(dirname(filepath), clean_url) 190 end 191 192 # Check if target exists 193 if !isfile(full_path) && !isdir(full_path) 194 push!(link_issues, "$filepath: Broken link to $link_url") 195 end 196 end 197 end 198 199 audit_results["link_issues"] = link_issues 200 return link_issues 201 end 202 203 function audit_code_examples() 204 println("๐ป Auditing Code Examples...") 205 206 code_issues = String[] 207 208 files_with_code = [ 209 "docs/src/getting-started/installation.md", 210 "docs/src/getting-started/quick-start.md", 211 "docs/src/getting-started/first-strategy.md" 212 ] 213 214 for filepath in files_with_code 215 if !isfile(filepath) 216 continue 217 end 218 219 content = read(filepath, String) 220 221 # Find Julia code blocks 222 julia_blocks = collect(eachmatch(r"```julia\n(.*?)\n```"s, content)) 223 224 for (i, block) in enumerate(julia_blocks) 225 code = block.captures[1] 226 227 # Check for common issues 228 if occursin("[", code) && occursin("](", code) 229 push!(code_issues, "$filepath: Julia code block $i contains markdown links") 230 end 231 232 if occursin("\$\$", code) 233 push!(code_issues, "$filepath: Julia code block $i has malformed string interpolation") 234 end 235 236 # Check for basic syntax validity (simple check) 237 if count(c -> c == '(', code) != count(c -> c == ')', code) 238 push!(code_issues, "$filepath: Julia code block $i has unmatched parentheses") 239 end 240 end 241 242 # Find bash code blocks 243 bash_blocks = collect(eachmatch(r"```bash\n(.*?)\n```"s, content)) 244 245 for (i, block) in enumerate(bash_blocks) 246 code = block.captures[1] 247 248 # Check for dangerous commands 249 dangerous_patterns = ["rm -rf /", "sudo rm", "format", "mkfs"] 250 for pattern in dangerous_patterns 251 if occursin(pattern, code) 252 push!(code_issues, "$filepath: Bash code block $i contains potentially dangerous command: $pattern") 253 end 254 end 255 end 256 end 257 258 audit_results["code_issues"] = code_issues 259 return code_issues 260 end 261 262 function audit_requirements_compliance() 263 println("โ Auditing Requirements Compliance...") 264 265 compliance_issues = String[] 266 267 # Requirement 2.1: Consistent content structure 268 frontmatter_issues = get(audit_results, "frontmatter_issues", String[]) 269 structure_issues = get(audit_results, "structure_issues", String[]) 270 271 if !isempty(frontmatter_issues) || !isempty(structure_issues) 272 push!(compliance_issues, "Requirement 2.1 (Consistent Structure): Issues found in frontmatter or structure") 273 end 274 275 # Requirement 4.1: Content accuracy and completeness 276 link_issues = get(audit_results, "link_issues", String[]) 277 code_issues = get(audit_results, "code_issues", String[]) 278 279 if !isempty(link_issues) || !isempty(code_issues) 280 push!(compliance_issues, "Requirement 4.1 (Accuracy & Completeness): Issues found in links or code") 281 end 282 283 # Requirement 4.4: Quality assurance 284 total_issues = length(frontmatter_issues) + length(structure_issues) + 285 length(link_issues) + length(code_issues) 286 287 if total_issues > 10 288 push!(compliance_issues, "Requirement 4.4 (Quality Assurance): Too many issues found ($total_issues)") 289 end 290 291 audit_results["compliance_issues"] = compliance_issues 292 return compliance_issues 293 end 294 295 function perform_cleanup_actions() 296 println("๐งน Performing Cleanup Actions...") 297 298 cleanup_count = 0 299 300 # 1. Remove outdated or redundant information 301 files_to_clean = [ 302 "docs/src/getting-started/installation.md", 303 "docs/src/getting-started/quick-start.md" 304 ] 305 306 for filepath in files_to_clean 307 if !isfile(filepath) 308 continue 309 end 310 311 content = read(filepath, String) 312 original_content = content 313 314 # Remove redundant "Note:" statements that repeat information 315 content = replace(content, r"Note:\s*This\s+is\s+the\s+same\s+as[^\n]*\n" => "") 316 317 # Clean up excessive whitespace 318 content = replace(content, r"\n{3,}" => "\n\n") 319 320 # Remove empty code blocks 321 content = replace(content, r"```[a-z]*\n\s*\n```" => "") 322 323 if content != original_content 324 write(filepath, content) 325 push!(cleanup_actions, "Cleaned up redundant content in $(basename(filepath))") 326 cleanup_count += 1 327 end 328 end 329 330 # 2. Ensure consistent formatting 331 for filepath in files_to_clean 332 if !isfile(filepath) 333 continue 334 end 335 336 content = read(filepath, String) 337 original_content = content 338 339 # Standardize heading formats 340 content = replace(content, r"^##\s*([^\n]+)" => s"## \1") 341 content = replace(content, r"^###\s*([^\n]+)" => s"### \1") 342 343 # Standardize list formatting 344 content = replace(content, r"^\*\s+" => "- ") 345 346 if content != original_content 347 write(filepath, content) 348 push!(cleanup_actions, "Standardized formatting in $(basename(filepath))") 349 cleanup_count += 1 350 end 351 end 352 353 return cleanup_count 354 end 355 356 function generate_audit_report() 357 println("\n๐ FINAL AUDIT REPORT") 358 println("=" ^ 50) 359 360 # Summary statistics 361 frontmatter_issues = get(audit_results, "frontmatter_issues", String[]) 362 structure_issues = get(audit_results, "structure_issues", String[]) 363 link_issues = get(audit_results, "link_issues", String[]) 364 code_issues = get(audit_results, "code_issues", String[]) 365 compliance_issues = get(audit_results, "compliance_issues", String[]) 366 367 total_issues = length(frontmatter_issues) + length(structure_issues) + 368 length(link_issues) + length(code_issues) 369 370 println("Total Issues Found: $total_issues") 371 println("Frontmatter Issues: $(length(frontmatter_issues))") 372 println("Structure Issues: $(length(structure_issues))") 373 println("Link Issues: $(length(link_issues))") 374 println("Code Issues: $(length(code_issues))") 375 println("Compliance Issues: $(length(compliance_issues))") 376 println("Cleanup Actions: $(length(cleanup_actions))") 377 378 # Detailed breakdown 379 if !isempty(frontmatter_issues) 380 println("\nโ FRONTMATTER ISSUES:") 381 for issue in frontmatter_issues[1:min(5, length(frontmatter_issues))] 382 println(" โข $issue") 383 end 384 if length(frontmatter_issues) > 5 385 println(" โข ... and $(length(frontmatter_issues) - 5) more") 386 end 387 end 388 389 if !isempty(structure_issues) 390 println("\nโ STRUCTURE ISSUES:") 391 for issue in structure_issues[1:min(5, length(structure_issues))] 392 println(" โข $issue") 393 end 394 if length(structure_issues) > 5 395 println(" โข ... and $(length(structure_issues) - 5) more") 396 end 397 end 398 399 if !isempty(link_issues) 400 println("\nโ LINK ISSUES:") 401 for issue in link_issues[1:min(3, length(link_issues))] 402 println(" โข $issue") 403 end 404 if length(link_issues) > 3 405 println(" โข ... and $(length(link_issues) - 3) more") 406 end 407 end 408 409 if !isempty(cleanup_actions) 410 println("\nโ CLEANUP ACTIONS PERFORMED:") 411 for action in cleanup_actions 412 println(" โข $action") 413 end 414 end 415 416 # Requirements validation 417 println("\n๐ REQUIREMENTS VALIDATION:") 418 req_2_1_pass = length(frontmatter_issues) + length(structure_issues) < 5 419 req_4_1_pass = length(link_issues) + length(code_issues) < 10 420 req_4_4_pass = total_issues < 15 421 422 println(" โข Requirement 2.1 (Consistent Structure): $(req_2_1_pass ? "โ PASS" : "โ FAIL")") 423 println(" โข Requirement 4.1 (Accuracy & Completeness): $(req_4_1_pass ? "โ PASS" : "โ FAIL")") 424 println(" โข Requirement 4.4 (Quality Assurance): $(req_4_4_pass ? "โ PASS" : "โ FAIL")") 425 426 # Overall assessment 427 overall_pass = req_2_1_pass && req_4_1_pass && req_4_4_pass 428 println("\n๐ฏ OVERALL ASSESSMENT: $(overall_pass ? "โ PASS" : "โ NEEDS WORK")") 429 430 if overall_pass 431 println("๐ Content audit completed successfully!") 432 println("๐ Documentation is ready for production use.") 433 else 434 println("โ ๏ธ Content audit identified issues that need attention.") 435 println("๐ง Review the issues above and apply necessary fixes.") 436 end 437 438 return overall_pass 439 end 440 441 # Main execution 442 println("๐ Starting Final Content Audit and Cleanup") 443 println("=" ^ 50) 444 445 # Run all audit checks 446 frontmatter_issues = audit_frontmatter_consistency() 447 structure_issues = audit_content_structure() 448 link_issues = audit_link_integrity() 449 code_issues = audit_code_examples() 450 compliance_issues = audit_requirements_compliance() 451 452 # Perform cleanup 453 cleanup_count = perform_cleanup_actions() 454 455 # Generate final report 456 overall_pass = generate_audit_report() 457 458 # Exit with appropriate code 459 exit_code = overall_pass ? 0 : 1 460 println("\nAudit completed with exit code: $exit_code") 461 exit(exit_code)