analyze_test_results.py
1 """ 2 Test Results Analyzer for CursorWrap Monitor Layout Tests 3 4 Analyzes test_report.json and provides detailed explanations of failures, 5 patterns, and recommendations. 6 """ 7 8 import json 9 import sys 10 from collections import defaultdict 11 from typing import Dict, List, Any 12 13 14 class TestResultAnalyzer: 15 """Analyzes test results and provides insights""" 16 17 def __init__(self, report_path: str = "test_report.json"): 18 with open(report_path, 'r') as f: 19 self.report = json.load(f) 20 21 self.failures = self.report.get('failures', []) 22 self.summary = self.report.get('summary', {}) 23 self.recommendations = self.report.get('recommendations', []) 24 25 def print_overview(self): 26 """Print test overview""" 27 print("=" * 80) 28 print("CURSORWRAP TEST RESULTS ANALYSIS") 29 print("=" * 80) 30 print(f"\nTotal Configurations Tested: {self.summary.get('total_configs', 0)}") 31 print(f"Passed: {self.summary.get('passed', 0)} ({self.summary.get('pass_rate', 'N/A')})") 32 print(f"Failed: {self.summary.get('failed', 0)}") 33 print(f"Total Issues: {self.summary.get('total_issues', 0)}") 34 35 if self.summary.get('passed', 0) == self.summary.get('total_configs', 0): 36 print("\n✓ ALL TESTS PASSED! Edge detection logic is working correctly.") 37 return 38 39 print(f"\n⚠ {self.summary.get('total_issues', 0)} issues detected\n") 40 41 def analyze_failure_patterns(self): 42 """Analyze and categorize failure patterns""" 43 print("=" * 80) 44 print("FAILURE PATTERN ANALYSIS") 45 print("=" * 80) 46 47 # Group by test type 48 by_test_type = defaultdict(list) 49 for failure in self.failures: 50 by_test_type[failure['test_name']].append(failure) 51 52 # Group by configuration 53 by_config = defaultdict(list) 54 for failure in self.failures: 55 by_config[failure['monitor_config']].append(failure) 56 57 print(f"\n1. Failures by Test Type:") 58 for test_type, failures in sorted(by_test_type.items(), key=lambda x: len(x[1]), reverse=True): 59 print(f" • {test_type}: {len(failures)} failures") 60 61 print(f"\n2. Configurations with Failures:") 62 for config, failures in sorted(by_config.items(), key=lambda x: len(x[1]), reverse=True): 63 print(f" • {config}") 64 print(f" {len(failures)} issues") 65 66 return by_test_type, by_config 67 68 def analyze_wrap_calculation_failures(self, failures: List[Dict[str, Any]]): 69 """Detailed analysis of wrap calculation failures""" 70 print("\n" + "=" * 80) 71 print("WRAP CALCULATION FAILURE ANALYSIS") 72 print("=" * 80) 73 74 # Analyze cursor positions 75 positions = [] 76 configs = set() 77 78 for failure in failures: 79 configs.add(failure['monitor_config']) 80 # Extract position from expected message 81 if 'test_point' in failure.get('details', {}): 82 pos = failure['details']['test_point'] 83 positions.append(pos) 84 85 print(f"\nAffected Configurations: {len(configs)}") 86 for config in sorted(configs): 87 print(f" • {config}") 88 89 if positions: 90 print(f"\nFailed Test Points: {len(positions)}") 91 # Analyze if failures are at edges 92 edge_positions = defaultdict(int) 93 for x, y in positions: 94 # Simplified edge detection 95 if x <= 10: 96 edge_positions['left edge'] += 1 97 elif y <= 10: 98 edge_positions['top edge'] += 1 99 else: 100 edge_positions['other'] += 1 101 102 if edge_positions: 103 print("\nPosition Distribution:") 104 for pos_type, count in edge_positions.items(): 105 print(f" • {pos_type}: {count}") 106 107 def explain_common_issues(self): 108 """Explain common issues found in results""" 109 print("\n" + "=" * 80) 110 print("COMMON ISSUE EXPLANATIONS") 111 print("=" * 80) 112 113 has_wrap_failures = any(f['test_name'] == 'wrap_calculation' for f in self.failures) 114 has_edge_failures = any(f['test_name'] == 'single_monitor_edges' for f in self.failures) 115 has_touching_failures = any(f['test_name'] == 'touching_monitors' for f in self.failures) 116 117 if has_wrap_failures: 118 print("\n⚠ WRAP CALCULATION FAILURES") 119 print("-" * 80) 120 print("Issue: Cursor is on an outer edge but wrapping is not occurring.") 121 print("\nLikely Causes:") 122 print(" 1. Partial Overlap Problem:") 123 print(" • When monitors have different sizes (e.g., 4K + 1080p)") 124 print(" • Only part of an edge is actually adjacent to another monitor") 125 print(" • Current code marks the ENTIRE edge as non-outer if ANY part is adjacent") 126 print(" • This prevents wrapping even in regions where it should occur") 127 print("\n 2. Edge Detection Logic:") 128 print(" • Check IdentifyOuterEdges() in MonitorTopology.cpp") 129 print(" • Consider segmenting edges based on actual overlap regions") 130 print("\n 3. Test Point Selection:") 131 print(" • Failures may be at corners or quarter points") 132 print(" • Indicates edge behavior varies along its length") 133 134 if has_edge_failures: 135 print("\n⚠ SINGLE MONITOR EDGE FAILURES") 136 print("-" * 80) 137 print("Issue: Single monitor should have exactly 4 outer edges.") 138 print("\nThis indicates a fundamental problem in edge detection baseline.") 139 140 if has_touching_failures: 141 print("\n⚠ TOUCHING MONITORS FAILURES") 142 print("-" * 80) 143 print("Issue: Adjacent monitors not detected correctly.") 144 print("\nCheck EdgesAreAdjacent() logic and 50px tolerance settings.") 145 146 def print_recommendations(self): 147 """Print recommendations from the report""" 148 if not self.recommendations: 149 return 150 151 print("\n" + "=" * 80) 152 print("RECOMMENDATIONS") 153 print("=" * 80) 154 155 for i, rec in enumerate(self.recommendations, 1): 156 print(f"\n{i}. {rec}") 157 158 def detailed_failure_dump(self): 159 """Print all failure details""" 160 print("\n" + "=" * 80) 161 print("DETAILED FAILURE LISTING") 162 print("=" * 80) 163 164 for i, failure in enumerate(self.failures, 1): 165 print(f"\n[{i}] {failure['test_name']}") 166 print(f"Configuration: {failure['monitor_config']}") 167 print(f"Expected: {failure['expected']}") 168 print(f"Actual: {failure['actual']}") 169 170 if 'details' in failure: 171 details = failure['details'] 172 if 'edge' in details: 173 edge = details['edge'] 174 print(f"Edge: {edge.get('edge_type', 'N/A')} at position {edge.get('position', 'N/A')}, " 175 f"range [{edge.get('range_start', 'N/A')}, {edge.get('range_end', 'N/A')}]") 176 if 'test_point' in details: 177 print(f"Test Point: {details['test_point']}") 178 print("-" * 80) 179 180 def generate_github_copilot_prompt(self): 181 """Generate a prompt suitable for GitHub Copilot to fix the issues""" 182 print("\n" + "=" * 80) 183 print("GITHUB COPILOT FIX PROMPT") 184 print("=" * 80) 185 print("\n```markdown") 186 print("# CursorWrap Edge Detection Bug Report") 187 print() 188 print("## Test Results Summary") 189 print(f"- Total Configurations Tested: {self.summary.get('total_configs', 0)}") 190 print(f"- Pass Rate: {self.summary.get('pass_rate', 'N/A')}") 191 print(f"- Failed Tests: {self.summary.get('failed', 0)}") 192 print(f"- Total Issues: {self.summary.get('total_issues', 0)}") 193 print() 194 195 # Group failures 196 by_test_type = defaultdict(list) 197 for failure in self.failures: 198 by_test_type[failure['test_name']].append(failure) 199 200 print("## Critical Issues Found") 201 print() 202 203 # Analyze wrap calculation failures 204 if 'wrap_calculation' in by_test_type: 205 failures = by_test_type['wrap_calculation'] 206 configs = set(f['monitor_config'] for f in failures) 207 208 print("### 1. Wrap Calculation Failures (PARTIAL OVERLAP BUG)") 209 print() 210 print(f"**Count**: {len(failures)} failures across {len(configs)} configuration(s)") 211 print() 212 print("**Affected Configurations**:") 213 for config in sorted(configs): 214 print(f"- {config}") 215 print() 216 217 print("**Root Cause Analysis**:") 218 print() 219 print("The current implementation in `MonitorTopology::IdentifyOuterEdges()` marks an") 220 print("ENTIRE edge as non-outer if ANY portion of that edge is adjacent to another monitor.") 221 print() 222 print("**Problem Scenario**: 1080p monitor + 4K monitor at bottom-right") 223 print("```") 224 print("4K Monitor (3840x2160 at 0,0)") 225 print("┌────────────────────────────────────────┐") 226 print("│ │ <- Y: 0-1080 NO adjacent monitor") 227 print("│ │ RIGHT EDGE SHOULD BE OUTER") 228 print("│ │") 229 print("│ │┌──────────┐") 230 print("│ ││ 1080p │ <- Y: 1080-2160 HAS adjacent") 231 print("└────────────────────────────────────────┘│ at │ RIGHT EDGE NOT OUTER") 232 print(" │ (3840, │") 233 print(" │ 1080) │") 234 print(" └──────────┘") 235 print("```") 236 print() 237 print("**Current Behavior**: Right edge of 4K monitor is marked as NON-OUTER for entire") 238 print("range (Y: 0-2160) because it detects adjacency in the bottom portion (Y: 1080-2160).") 239 print() 240 print("**Expected Behavior**: Right edge should be:") 241 print("- OUTER from Y: 0 to Y: 1080 (no adjacent monitor)") 242 print("- NON-OUTER from Y: 1080 to Y: 2160 (adjacent to 1080p monitor)") 243 print() 244 245 print("**Failed Test Examples**:") 246 print() 247 for i, failure in enumerate(failures[:3], 1): # Show first 3 248 details = failure.get('details', {}) 249 test_point = details.get('test_point', 'N/A') 250 edge = details.get('edge', {}) 251 edge_type = edge.get('edge_type', 'N/A') 252 position = edge.get('position', 'N/A') 253 range_start = edge.get('range_start', 'N/A') 254 range_end = edge.get('range_end', 'N/A') 255 256 print(f"{i}. **Configuration**: {failure['monitor_config']}") 257 print(f" - Test Point: {test_point}") 258 print(f" - Edge: {edge_type} at X={position}, Y range=[{range_start}, {range_end}]") 259 print(f" - Expected: Cursor wraps to opposite edge") 260 print(f" - Actual: No wrap occurred (edge incorrectly marked as non-outer)") 261 print() 262 263 if len(failures) > 3: 264 print(f" ... and {len(failures) - 3} more similar failures") 265 print() 266 267 # Other failure types 268 if 'single_monitor_edges' in by_test_type: 269 print("### 2. Single Monitor Edge Detection Failures") 270 print() 271 print(f"**Count**: {len(by_test_type['single_monitor_edges'])} failures") 272 print() 273 print("Single monitor configurations should have exactly 4 outer edges.") 274 print("This indicates a fundamental problem in baseline edge detection.") 275 print() 276 277 if 'touching_monitors' in by_test_type: 278 print("### 3. Adjacent Monitor Detection Failures") 279 print() 280 print(f"**Count**: {len(by_test_type['touching_monitors'])} failures") 281 print() 282 print("Adjacent monitors not being detected correctly by EdgesAreAdjacent().") 283 print() 284 285 print("## Required Code Changes") 286 print() 287 print("### File: `MonitorTopology.cpp`") 288 print() 289 print("**Change 1**: Modify `IdentifyOuterEdges()` to support partial edge adjacency") 290 print() 291 print("Instead of marking entire edges as outer/non-outer, the code needs to:") 292 print() 293 print("1. **Segment edges** based on actual overlap regions with adjacent monitors") 294 print("2. Create **sub-edges** for portions of an edge that have different outer status") 295 print("3. Update `IsOnOuterEdge()` to check if the **cursor's specific position** is on an outer portion") 296 print() 297 print("**Proposed Approach**:") 298 print() 299 print("```cpp") 300 print("// Instead of: edge.isOuter = true/false for entire edge") 301 print("// Use: Store list of outer ranges for each edge") 302 print() 303 print("struct MonitorEdge {") 304 print(" // ... existing fields ...") 305 print(" std::vector<std::pair<int, int>> outerRanges; // Ranges where edge is outer") 306 print("};") 307 print() 308 print("// In IdentifyOuterEdges():") 309 print("// For each edge, find ALL adjacent opposite edges") 310 print("// Calculate which portions of the edge have NO adjacent opposite") 311 print("// Store these as outer ranges") 312 print() 313 print("// In IsOnOuterEdge():") 314 print("// Check if cursor position falls within any outer range") 315 print("if (edge.type == EdgeType::Left || edge.type == EdgeType::Right) {") 316 print(" // Check if cursorPos.y is in any outer range") 317 print("} else {") 318 print(" // Check if cursorPos.x is in any outer range") 319 print("}") 320 print("```") 321 print() 322 print("**Change 2**: Update `EdgesAreAdjacent()` validation") 323 print() 324 print("The 50px tolerance logic is correct but needs to return overlap range info:") 325 print() 326 print("```cpp") 327 print("struct AdjacencyResult {") 328 print(" bool isAdjacent;") 329 print(" int overlapStart; // Where the adjacency begins") 330 print(" int overlapEnd; // Where the adjacency ends") 331 print("};") 332 print() 333 print("AdjacencyResult CheckEdgeAdjacency(const MonitorEdge& edge1, ") 334 print(" const MonitorEdge& edge2, ") 335 print(" int tolerance);") 336 print("```") 337 print() 338 print("## Test Validation") 339 print() 340 print("After implementing changes, run:") 341 print("```bash") 342 print("python monitor_layout_tests.py --max-monitors 10") 343 print("```") 344 print() 345 print("Expected results:") 346 print("- All 21+ configurations should pass") 347 print("- Specifically, the 4K+1080p configuration should pass all 5 test points per edge") 348 print("- Wrap calculation should work correctly at partial overlap boundaries") 349 print() 350 print("## Files to Modify") 351 print() 352 print("1. `MonitorTopology.h` - Update MonitorEdge structure") 353 print("2. `MonitorTopology.cpp` - Implement segmented edge detection") 354 print(" - `IdentifyOuterEdges()` - Main logic change") 355 print(" - `IsOnOuterEdge()` - Check position against ranges") 356 print(" - `EdgesAreAdjacent()` - Optionally return range info") 357 print() 358 print("```") 359 360 def run_analysis(self, detailed: bool = False, copilot_mode: bool = False): 361 """Run complete analysis""" 362 if copilot_mode: 363 self.generate_github_copilot_prompt() 364 return 365 366 self.print_overview() 367 368 if not self.failures: 369 print("\n✓ No failures to analyze!") 370 return 371 372 by_test_type, by_config = self.analyze_failure_patterns() 373 374 # Specific analysis for wrap calculation failures 375 if 'wrap_calculation' in by_test_type: 376 self.analyze_wrap_calculation_failures(by_test_type['wrap_calculation']) 377 378 self.explain_common_issues() 379 self.print_recommendations() 380 381 if detailed: 382 self.detailed_failure_dump() 383 384 385 def main(): 386 """Main entry point""" 387 import argparse 388 389 parser = argparse.ArgumentParser( 390 description="Analyze CursorWrap test results" 391 ) 392 parser.add_argument( 393 "--report", 394 default="test_report.json", 395 help="Path to test report JSON file" 396 ) 397 parser.add_argument( 398 "--detailed", 399 action="store_true", 400 help="Show detailed failure listing" 401 ) 402 parser.add_argument( 403 "--copilot", 404 action="store_true", 405 help="Generate GitHub Copilot-friendly fix prompt" 406 ) 407 408 args = parser.parse_args() 409 410 try: 411 analyzer = TestResultAnalyzer(args.report) 412 analyzer.run_analysis(detailed=args.detailed, copilot_mode=args.copilot) 413 414 # Exit with error code if there were failures 415 sys.exit(0 if not analyzer.failures else 1) 416 417 except FileNotFoundError: 418 print(f"Error: Could not find report file: {args.report}") 419 print("\nRun monitor_layout_tests.py first to generate the report.") 420 sys.exit(1) 421 except json.JSONDecodeError: 422 print(f"Error: Invalid JSON in report file: {args.report}") 423 sys.exit(1) 424 except Exception as e: 425 print(f"Error analyzing report: {e}") 426 sys.exit(1) 427 428 429 if __name__ == "__main__": 430 main()