/ docs / test / final_content_audit.jl
final_content_audit.jl
  1  #!/usr/bin/env julia
  2  
  3  """
  4  Final Content Audit and Cleanup Script
  5  
  6  This script performs a comprehensive audit of all migrated content for consistency,
  7  accuracy, and completeness. It identifies and fixes issues to ensure all requirements
  8  are met and validated.
  9  
 10  Requirements addressed:
 11  - 2.1: Consistent content structure
 12  - 4.1: Content accuracy and completeness  
 13  - 4.4: Quality assurance and validation
 14  """
 15  
 16  using Test
 17  
 18  # Global tracking
 19  audit_results = Dict{String, Any}()
 20  issues_found = String[]
 21  fixes_applied = String[]
 22  cleanup_actions = String[]
 23  
 24  function audit_frontmatter_consistency()
 25      println("๐Ÿ“ Auditing Frontmatter Consistency...")
 26      
 27      # Required frontmatter fields for each category
 28      required_fields = Dict(
 29          "getting-started" => ["title", "description", "category", "difficulty", "estimated_time", "topics", "last_updated"],
 30          "guides" => ["title", "description", "category", "difficulty", "topics", "last_updated"],
 31          "reference" => ["title", "description", "category", "difficulty", "topics", "last_updated"],
 32          "troubleshooting" => ["title", "description", "category", "difficulty", "topics", "last_updated"],
 33          "advanced" => ["title", "description", "category", "difficulty", "topics", "last_updated"],
 34          "resources" => ["title", "description", "category", "difficulty", "topics", "last_updated"]
 35      )
 36      
 37      # Find all markdown files
 38      md_files = String[]
 39      for root in ["docs/src/getting-started", "docs/src/guides", "docs/src/reference", 
 40                   "docs/src/troubleshooting", "docs/src/advanced", "docs/src/resources"]
 41          if isdir(root)
 42              for file in readdir(root, join=true)
 43                  if endswith(file, ".md")
 44                      push!(md_files, file)
 45                  end
 46              end
 47          end
 48      end
 49      
 50      # Add main files
 51      push!(md_files, "docs/src/index.md")
 52      
 53      frontmatter_issues = String[]
 54      
 55      for filepath in md_files
 56          if !isfile(filepath)
 57              continue
 58          end
 59          
 60          content = read(filepath, String)
 61          
 62          # Check for frontmatter
 63          if !startswith(content, "---")
 64              push!(frontmatter_issues, "$filepath: Missing frontmatter")
 65              continue
 66          end
 67          
 68          # Extract frontmatter
 69          lines = split(content, '\n')
 70          frontmatter_end = findfirst(i -> i > 1 && lines[i] == "---", 1:length(lines))
 71          
 72          if frontmatter_end === nothing
 73              push!(frontmatter_issues, "$filepath: Malformed frontmatter")
 74              continue
 75          end
 76          
 77          frontmatter = join(lines[2:frontmatter_end-1], '\n')
 78          
 79          # Determine category from path or frontmatter
 80          category = "general"
 81          if contains(filepath, "getting-started")
 82              category = "getting-started"
 83          elseif contains(filepath, "guides")
 84              category = "guides"
 85          elseif contains(filepath, "reference")
 86              category = "reference"
 87          elseif contains(filepath, "troubleshooting")
 88              category = "troubleshooting"
 89          elseif contains(filepath, "advanced")
 90              category = "advanced"
 91          elseif contains(filepath, "resources")
 92              category = "resources"
 93          end
 94          
 95          # Check required fields
 96          if haskey(required_fields, category)
 97              missing_fields = String[]
 98              for field in required_fields[category]
 99                  if !occursin("$field:", frontmatter)
100                      push!(missing_fields, field)
101                  end
102              end
103              
104              if !isempty(missing_fields)
105                  push!(frontmatter_issues, "$filepath: Missing fields: $(join(missing_fields, ", "))")
106              end
107          end
108      end
109      
110      audit_results["frontmatter_issues"] = frontmatter_issues
111      return frontmatter_issues
112  end
113  
114  function audit_content_structure()
115      println("๐Ÿ“‹ Auditing Content Structure...")
116      
117      structure_issues = String[]
118      
119      # Check getting-started files for proper structure
120      getting_started_files = [
121          ("docs/src/getting-started/index.md", ["Learning Objectives", "Section Overview", "Prerequisites"]),
122          ("docs/src/getting-started/installation.md", ["Prerequisites", "Method 1", "Method 2", "Verification"]),
123          ("docs/src/getting-started/quick-start.md", ["What You'll Accomplish", "Step 1", "Step 2", "Step 3"]),
124          ("docs/src/getting-started/first-strategy.md", ["What You'll Learn", "Prerequisites", "Step 1", "Step 2"])
125      ]
126      
127      for (filepath, required_sections) in getting_started_files
128          if !isfile(filepath)
129              push!(structure_issues, "$filepath: File missing")
130              continue
131          end
132          
133          content = read(filepath, String)
134          missing_sections = String[]
135          
136          for section in required_sections
137              if !occursin(section, content)
138                  push!(missing_sections, section)
139              end
140          end
141          
142          if !isempty(missing_sections)
143              push!(structure_issues, "$filepath: Missing sections: $(join(missing_sections, ", "))")
144          end
145      end
146      
147      audit_results["structure_issues"] = structure_issues
148      return structure_issues
149  end
150  
151  function audit_link_integrity()
152      println("๐Ÿ”— Auditing Link Integrity...")
153      
154      link_issues = String[]
155      
156      # Files to check for links
157      files_to_check = [
158          "docs/src/index.md",
159          "docs/src/getting-started/index.md",
160          "docs/src/getting-started/installation.md",
161          "docs/src/getting-started/quick-start.md",
162          "docs/src/getting-started/first-strategy.md"
163      ]
164      
165      for filepath in files_to_check
166          if !isfile(filepath)
167              continue
168          end
169          
170          content = read(filepath, String)
171          
172          # Find all markdown links
173          link_pattern = r"\[([^\]]+)\]\(([^)]+)\)"
174          links = collect(eachmatch(link_pattern, content))
175          
176          for link_match in links
177              link_url = link_match.captures[2]
178              
179              # Skip external links and anchors
180              if startswith(link_url, "http") || startswith(link_url, "#")
181                  continue
182              end
183              
184              # Resolve relative path
185              clean_url = split(link_url, '#')[1]
186              if startswith(clean_url, "../")
187                  full_path = normpath(joinpath(dirname(filepath), clean_url))
188              else
189                  full_path = joinpath(dirname(filepath), clean_url)
190              end
191              
192              # Check if target exists
193              if !isfile(full_path) && !isdir(full_path)
194                  push!(link_issues, "$filepath: Broken link to $link_url")
195              end
196          end
197      end
198      
199      audit_results["link_issues"] = link_issues
200      return link_issues
201  end
202  
203  function audit_code_examples()
204      println("๐Ÿ’ป Auditing Code Examples...")
205      
206      code_issues = String[]
207      
208      files_with_code = [
209          "docs/src/getting-started/installation.md",
210          "docs/src/getting-started/quick-start.md",
211          "docs/src/getting-started/first-strategy.md"
212      ]
213      
214      for filepath in files_with_code
215          if !isfile(filepath)
216              continue
217          end
218          
219          content = read(filepath, String)
220          
221          # Find Julia code blocks
222          julia_blocks = collect(eachmatch(r"```julia\n(.*?)\n```"s, content))
223          
224          for (i, block) in enumerate(julia_blocks)
225              code = block.captures[1]
226              
227              # Check for common issues
228              if occursin("[", code) && occursin("](", code)
229                  push!(code_issues, "$filepath: Julia code block $i contains markdown links")
230              end
231              
232              if occursin("\$\$", code)
233                  push!(code_issues, "$filepath: Julia code block $i has malformed string interpolation")
234              end
235              
236              # Check for basic syntax validity (simple check)
237              if count(c -> c == '(', code) != count(c -> c == ')', code)
238                  push!(code_issues, "$filepath: Julia code block $i has unmatched parentheses")
239              end
240          end
241          
242          # Find bash code blocks
243          bash_blocks = collect(eachmatch(r"```bash\n(.*?)\n```"s, content))
244          
245          for (i, block) in enumerate(bash_blocks)
246              code = block.captures[1]
247              
248              # Check for dangerous commands
249              dangerous_patterns = ["rm -rf /", "sudo rm", "format", "mkfs"]
250              for pattern in dangerous_patterns
251                  if occursin(pattern, code)
252                      push!(code_issues, "$filepath: Bash code block $i contains potentially dangerous command: $pattern")
253                  end
254              end
255          end
256      end
257      
258      audit_results["code_issues"] = code_issues
259      return code_issues
260  end
261  
262  function audit_requirements_compliance()
263      println("โœ… Auditing Requirements Compliance...")
264      
265      compliance_issues = String[]
266      
267      # Requirement 2.1: Consistent content structure
268      frontmatter_issues = get(audit_results, "frontmatter_issues", String[])
269      structure_issues = get(audit_results, "structure_issues", String[])
270      
271      if !isempty(frontmatter_issues) || !isempty(structure_issues)
272          push!(compliance_issues, "Requirement 2.1 (Consistent Structure): Issues found in frontmatter or structure")
273      end
274      
275      # Requirement 4.1: Content accuracy and completeness
276      link_issues = get(audit_results, "link_issues", String[])
277      code_issues = get(audit_results, "code_issues", String[])
278      
279      if !isempty(link_issues) || !isempty(code_issues)
280          push!(compliance_issues, "Requirement 4.1 (Accuracy & Completeness): Issues found in links or code")
281      end
282      
283      # Requirement 4.4: Quality assurance
284      total_issues = length(frontmatter_issues) + length(structure_issues) + 
285                     length(link_issues) + length(code_issues)
286      
287      if total_issues > 10
288          push!(compliance_issues, "Requirement 4.4 (Quality Assurance): Too many issues found ($total_issues)")
289      end
290      
291      audit_results["compliance_issues"] = compliance_issues
292      return compliance_issues
293  end
294  
295  function perform_cleanup_actions()
296      println("๐Ÿงน Performing Cleanup Actions...")
297      
298      cleanup_count = 0
299      
300      # 1. Remove outdated or redundant information
301      files_to_clean = [
302          "docs/src/getting-started/installation.md",
303          "docs/src/getting-started/quick-start.md"
304      ]
305      
306      for filepath in files_to_clean
307          if !isfile(filepath)
308              continue
309          end
310          
311          content = read(filepath, String)
312          original_content = content
313          
314          # Remove redundant "Note:" statements that repeat information
315          content = replace(content, r"Note:\s*This\s+is\s+the\s+same\s+as[^\n]*\n" => "")
316          
317          # Clean up excessive whitespace
318          content = replace(content, r"\n{3,}" => "\n\n")
319          
320          # Remove empty code blocks
321          content = replace(content, r"```[a-z]*\n\s*\n```" => "")
322          
323          if content != original_content
324              write(filepath, content)
325              push!(cleanup_actions, "Cleaned up redundant content in $(basename(filepath))")
326              cleanup_count += 1
327          end
328      end
329      
330      # 2. Ensure consistent formatting
331      for filepath in files_to_clean
332          if !isfile(filepath)
333              continue
334          end
335          
336          content = read(filepath, String)
337          original_content = content
338          
339          # Standardize heading formats
340          content = replace(content, r"^##\s*([^\n]+)" => s"## \1")
341          content = replace(content, r"^###\s*([^\n]+)" => s"### \1")
342          
343          # Standardize list formatting
344          content = replace(content, r"^\*\s+" => "- ")
345          
346          if content != original_content
347              write(filepath, content)
348              push!(cleanup_actions, "Standardized formatting in $(basename(filepath))")
349              cleanup_count += 1
350          end
351      end
352      
353      return cleanup_count
354  end
355  
356  function generate_audit_report()
357      println("\n๐Ÿ“Š FINAL AUDIT REPORT")
358      println("=" ^ 50)
359      
360      # Summary statistics
361      frontmatter_issues = get(audit_results, "frontmatter_issues", String[])
362      structure_issues = get(audit_results, "structure_issues", String[])
363      link_issues = get(audit_results, "link_issues", String[])
364      code_issues = get(audit_results, "code_issues", String[])
365      compliance_issues = get(audit_results, "compliance_issues", String[])
366      
367      total_issues = length(frontmatter_issues) + length(structure_issues) + 
368                     length(link_issues) + length(code_issues)
369      
370      println("Total Issues Found: $total_issues")
371      println("Frontmatter Issues: $(length(frontmatter_issues))")
372      println("Structure Issues: $(length(structure_issues))")
373      println("Link Issues: $(length(link_issues))")
374      println("Code Issues: $(length(code_issues))")
375      println("Compliance Issues: $(length(compliance_issues))")
376      println("Cleanup Actions: $(length(cleanup_actions))")
377      
378      # Detailed breakdown
379      if !isempty(frontmatter_issues)
380          println("\nโŒ FRONTMATTER ISSUES:")
381          for issue in frontmatter_issues[1:min(5, length(frontmatter_issues))]
382              println("  โ€ข $issue")
383          end
384          if length(frontmatter_issues) > 5
385              println("  โ€ข ... and $(length(frontmatter_issues) - 5) more")
386          end
387      end
388      
389      if !isempty(structure_issues)
390          println("\nโŒ STRUCTURE ISSUES:")
391          for issue in structure_issues[1:min(5, length(structure_issues))]
392              println("  โ€ข $issue")
393          end
394          if length(structure_issues) > 5
395              println("  โ€ข ... and $(length(structure_issues) - 5) more")
396          end
397      end
398      
399      if !isempty(link_issues)
400          println("\nโŒ LINK ISSUES:")
401          for issue in link_issues[1:min(3, length(link_issues))]
402              println("  โ€ข $issue")
403          end
404          if length(link_issues) > 3
405              println("  โ€ข ... and $(length(link_issues) - 3) more")
406          end
407      end
408      
409      if !isempty(cleanup_actions)
410          println("\nโœ… CLEANUP ACTIONS PERFORMED:")
411          for action in cleanup_actions
412              println("  โ€ข $action")
413          end
414      end
415      
416      # Requirements validation
417      println("\n๐Ÿ“‹ REQUIREMENTS VALIDATION:")
418      req_2_1_pass = length(frontmatter_issues) + length(structure_issues) < 5
419      req_4_1_pass = length(link_issues) + length(code_issues) < 10
420      req_4_4_pass = total_issues < 15
421      
422      println("  โ€ข Requirement 2.1 (Consistent Structure): $(req_2_1_pass ? "โœ… PASS" : "โŒ FAIL")")
423      println("  โ€ข Requirement 4.1 (Accuracy & Completeness): $(req_4_1_pass ? "โœ… PASS" : "โŒ FAIL")")
424      println("  โ€ข Requirement 4.4 (Quality Assurance): $(req_4_4_pass ? "โœ… PASS" : "โŒ FAIL")")
425      
426      # Overall assessment
427      overall_pass = req_2_1_pass && req_4_1_pass && req_4_4_pass
428      println("\n๐ŸŽฏ OVERALL ASSESSMENT: $(overall_pass ? "โœ… PASS" : "โŒ NEEDS WORK")")
429      
430      if overall_pass
431          println("๐ŸŽ‰ Content audit completed successfully!")
432          println("๐Ÿ“š Documentation is ready for production use.")
433      else
434          println("โš ๏ธ  Content audit identified issues that need attention.")
435          println("๐Ÿ”ง Review the issues above and apply necessary fixes.")
436      end
437      
438      return overall_pass
439  end
440  
441  # Main execution
442  println("๐Ÿš€ Starting Final Content Audit and Cleanup")
443  println("=" ^ 50)
444  
445  # Run all audit checks
446  frontmatter_issues = audit_frontmatter_consistency()
447  structure_issues = audit_content_structure()
448  link_issues = audit_link_integrity()
449  code_issues = audit_code_examples()
450  compliance_issues = audit_requirements_compliance()
451  
452  # Perform cleanup
453  cleanup_count = perform_cleanup_actions()
454  
455  # Generate final report
456  overall_pass = generate_audit_report()
457  
458  # Exit with appropriate code
459  exit_code = overall_pass ? 0 : 1
460  println("\nAudit completed with exit code: $exit_code")
461  exit(exit_code)