Perception.swift
1 import ApplicationServices 2 import AppKit 3 4 let interactiveRoles: Set<String> = [ 5 "AXButton", "AXTextField", "AXTextArea", "AXCheckBox", 6 "AXRadioButton", "AXPopUpButton", "AXComboBox", "AXSlider", 7 "AXMenuItem", "AXLink", "AXRow", "AXMenuButton", 8 "AXIncrementor", "AXColorWell", "AXDisclosureTriangle", 9 "AXTabGroup", "AXTab", "AXToolbar", "AXMenuBar", 10 "AXMenu", "AXSegmentedControl", 11 ] 12 13 /// Layout containers that cost 0 semantic depth. 14 let layoutRoles: Set<String> = [ 15 "AXGroup", "AXGenericElement", "AXSection", "AXDiv", 16 "AXList", "AXLandmarkMain", "AXLandmarkNavigation", 17 "AXLandmarkBanner", "AXLandmarkContentInfo", 18 "AXSplitGroup", "AXScrollArea", "AXLayoutArea", 19 ] 20 21 var refCounter = 0 22 var refPaths: [String: RefEntry] = [:] 23 24 func walkTree(_ el: AXUIElement, semanticDepth: Int, budget: Int, filter: String, path: String) -> Element? { 25 guard let role = axString(el, "AXRole") else { return nil } 26 27 let subrole = axString(el, "AXSubrole") 28 let title = axString(el, "AXTitle") 29 let desc = axString(el, "AXDescription") 30 var valStr: String? = nil 31 if let v = axValue(el, "AXValue") { 32 let s = "\(v)" 33 valStr = s.count > 200 ? String(s.prefix(200)) + "..." : s 34 } 35 let enabled = axBool(el, "AXEnabled") 36 let selected = axBool(el, "AXSelected") 37 38 let hasContent = title != nil || desc != nil || valStr != nil 39 let cost = (layoutRoles.contains(role) && !hasContent) ? 0 : 1 40 let newDepth = semanticDepth + cost 41 42 guard newDepth <= budget else { return nil } 43 44 var childElements: [Element]? = nil 45 if let kids = axChildren(el) { 46 var childIndex: [String: Int] = [:] 47 var results: [Element] = [] 48 for kid in kids { 49 guard let kidRole = axString(kid, "AXRole") else { continue } 50 let idx = childIndex[kidRole, default: 0] 51 childIndex[kidRole] = idx + 1 52 let childPath = "\(path)/\(kidRole)[\(idx)]" 53 if let child = walkTree(kid, semanticDepth: newDepth, budget: budget, filter: filter, path: childPath) { 54 results.append(child) 55 } 56 } 57 if !results.isEmpty { childElements = results } 58 } 59 60 if filter == "interactive" { 61 let isInteractive = interactiveRoles.contains(role) 62 let hasInteractiveChildren = childElements != nil && !childElements!.isEmpty 63 if !isInteractive && !hasInteractiveChildren { return nil } 64 } 65 66 refCounter += 1 67 let ref = "e\(refCounter)" 68 refPaths[ref] = RefEntry(path: path, role: role) 69 70 var elem = Element(ref: ref, role: role) 71 if let s = subrole, !s.isEmpty { elem.subrole = s } 72 if let t = title, !t.isEmpty { elem.title = t } 73 if let d = desc, !d.isEmpty { elem.desc = d } 74 if let v = valStr, !v.isEmpty { elem.value = v } 75 if let e = enabled, !e { elem.enabled = e } 76 if let s = selected, s { elem.selected = s } 77 elem.children = childElements 78 79 return elem 80 } 81 82 func annotateElements(pid: Int, roles: [String]?, maxLabels: Int) -> AnnotateResult? { 83 let appRef = AXUIElementCreateApplication(Int32(pid)) 84 let appName: String 85 if let app = NSRunningApplication(processIdentifier: Int32(pid)) { 86 appName = app.localizedName ?? "Unknown" 87 } else { 88 appName = "Unknown" 89 } 90 91 guard let windows = axValue(appRef, "AXWindows") as? [AXUIElement], 92 let win = windows.first else { 93 return nil 94 } 95 96 let winTitle = axString(win, "AXTitle") ?? "" 97 let roleFilter: Set<String>? = roles.flatMap { roles in 98 roles.isEmpty ? nil : Set(roles) 99 } 100 101 var annotations: [AnnotationEntry] = [] 102 var annotateRefPaths: [String: RefEntry] = [:] 103 var labelCounter = 0 104 105 func walkForAnnotation(_ el: AXUIElement, path: String) { 106 guard labelCounter < maxLabels else { return } 107 guard let role = axString(el, "AXRole") else { return } 108 109 let isInteractive = interactiveRoles.contains(role) 110 let matchesFilter = roleFilter == nil || roleFilter!.contains(role) 111 112 if isInteractive && matchesFilter { 113 if let frame = elementFrame(el) { 114 labelCounter += 1 115 let ref = "a\(labelCounter)" 116 let title = axString(el, "AXTitle") ?? axString(el, "AXDescription") 117 annotations.append(AnnotationEntry( 118 label: labelCounter, ref: ref, role: role, 119 title: title, 120 x: frame.x, y: frame.y, 121 width: frame.width, height: frame.height 122 )) 123 annotateRefPaths[ref] = RefEntry(path: path, role: role) 124 } 125 } 126 127 guard labelCounter < maxLabels else { return } 128 if let kids = axChildren(el) { 129 var childIndex: [String: Int] = [:] 130 for kid in kids { 131 guard let kidRole = axString(kid, "AXRole") else { continue } 132 let idx = childIndex[kidRole, default: 0] 133 childIndex[kidRole] = idx + 1 134 let childPath = "\(path)/\(kidRole)[\(idx)]" 135 walkForAnnotation(kid, path: childPath) 136 if labelCounter >= maxLabels { break } 137 } 138 } 139 } 140 141 if let kids = axChildren(win) { 142 var childIndex: [String: Int] = [:] 143 for kid in kids { 144 guard let kidRole = axString(kid, "AXRole") else { continue } 145 let idx = childIndex[kidRole, default: 0] 146 childIndex[kidRole] = idx + 1 147 let path = "window[0]/\(kidRole)[\(idx)]" 148 walkForAnnotation(kid, path: path) 149 if labelCounter >= maxLabels { break } 150 } 151 } 152 153 return AnnotateResult( 154 app: appName, 155 pid: pid, 156 window: winTitle, 157 annotations: annotations, 158 refPaths: annotateRefPaths 159 ) 160 } 161 162 func readTree(pid: Int, budget: Int, filter: String) -> ReadTreeResult? { 163 let appRef = AXUIElementCreateApplication(Int32(pid)) 164 let appName: String 165 if let app = NSRunningApplication(processIdentifier: Int32(pid)) { 166 appName = app.localizedName ?? "Unknown" 167 } else { 168 appName = "Unknown" 169 } 170 171 guard let windows = axValue(appRef, "AXWindows") as? [AXUIElement], 172 let win = windows.first else { 173 return nil 174 } 175 176 let winTitle = axString(win, "AXTitle") ?? "" 177 refCounter = 0 178 refPaths = [:] 179 var elements: [Element] = [] 180 if let kids = axChildren(win) { 181 var childIndex: [String: Int] = [:] 182 for kid in kids { 183 guard let kidRole = axString(kid, "AXRole") else { continue } 184 let idx = childIndex[kidRole, default: 0] 185 childIndex[kidRole] = idx + 1 186 let path = "window[0]/\(kidRole)[\(idx)]" 187 if let elem = walkTree(kid, semanticDepth: 0, budget: budget, filter: filter, path: path) { 188 elements.append(elem) 189 } 190 } 191 } 192 193 return ReadTreeResult( 194 app: appName, 195 pid: pid, 196 window: winTitle, 197 elements: elements, 198 refPaths: refPaths 199 ) 200 }