/ internal / tools / axserver / Sources / Perception.swift
Perception.swift
  1  import ApplicationServices
  2  import AppKit
  3  
  4  let interactiveRoles: Set<String> = [
  5      "AXButton", "AXTextField", "AXTextArea", "AXCheckBox",
  6      "AXRadioButton", "AXPopUpButton", "AXComboBox", "AXSlider",
  7      "AXMenuItem", "AXLink", "AXRow", "AXMenuButton",
  8      "AXIncrementor", "AXColorWell", "AXDisclosureTriangle",
  9      "AXTabGroup", "AXTab", "AXToolbar", "AXMenuBar",
 10      "AXMenu", "AXSegmentedControl",
 11  ]
 12  
 13  /// Layout containers that cost 0 semantic depth.
 14  let layoutRoles: Set<String> = [
 15      "AXGroup", "AXGenericElement", "AXSection", "AXDiv",
 16      "AXList", "AXLandmarkMain", "AXLandmarkNavigation",
 17      "AXLandmarkBanner", "AXLandmarkContentInfo",
 18      "AXSplitGroup", "AXScrollArea", "AXLayoutArea",
 19  ]
 20  
 21  var refCounter = 0
 22  var refPaths: [String: RefEntry] = [:]
 23  
 24  func walkTree(_ el: AXUIElement, semanticDepth: Int, budget: Int, filter: String, path: String) -> Element? {
 25      guard let role = axString(el, "AXRole") else { return nil }
 26  
 27      let subrole = axString(el, "AXSubrole")
 28      let title = axString(el, "AXTitle")
 29      let desc = axString(el, "AXDescription")
 30      var valStr: String? = nil
 31      if let v = axValue(el, "AXValue") {
 32          let s = "\(v)"
 33          valStr = s.count > 200 ? String(s.prefix(200)) + "..." : s
 34      }
 35      let enabled = axBool(el, "AXEnabled")
 36      let selected = axBool(el, "AXSelected")
 37  
 38      let hasContent = title != nil || desc != nil || valStr != nil
 39      let cost = (layoutRoles.contains(role) && !hasContent) ? 0 : 1
 40      let newDepth = semanticDepth + cost
 41  
 42      guard newDepth <= budget else { return nil }
 43  
 44      var childElements: [Element]? = nil
 45      if let kids = axChildren(el) {
 46          var childIndex: [String: Int] = [:]
 47          var results: [Element] = []
 48          for kid in kids {
 49              guard let kidRole = axString(kid, "AXRole") else { continue }
 50              let idx = childIndex[kidRole, default: 0]
 51              childIndex[kidRole] = idx + 1
 52              let childPath = "\(path)/\(kidRole)[\(idx)]"
 53              if let child = walkTree(kid, semanticDepth: newDepth, budget: budget, filter: filter, path: childPath) {
 54                  results.append(child)
 55              }
 56          }
 57          if !results.isEmpty { childElements = results }
 58      }
 59  
 60      if filter == "interactive" {
 61          let isInteractive = interactiveRoles.contains(role)
 62          let hasInteractiveChildren = childElements != nil && !childElements!.isEmpty
 63          if !isInteractive && !hasInteractiveChildren { return nil }
 64      }
 65  
 66      refCounter += 1
 67      let ref = "e\(refCounter)"
 68      refPaths[ref] = RefEntry(path: path, role: role)
 69  
 70      var elem = Element(ref: ref, role: role)
 71      if let s = subrole, !s.isEmpty { elem.subrole = s }
 72      if let t = title, !t.isEmpty { elem.title = t }
 73      if let d = desc, !d.isEmpty { elem.desc = d }
 74      if let v = valStr, !v.isEmpty { elem.value = v }
 75      if let e = enabled, !e { elem.enabled = e }
 76      if let s = selected, s { elem.selected = s }
 77      elem.children = childElements
 78  
 79      return elem
 80  }
 81  
 82  func annotateElements(pid: Int, roles: [String]?, maxLabels: Int) -> AnnotateResult? {
 83      let appRef = AXUIElementCreateApplication(Int32(pid))
 84      let appName: String
 85      if let app = NSRunningApplication(processIdentifier: Int32(pid)) {
 86          appName = app.localizedName ?? "Unknown"
 87      } else {
 88          appName = "Unknown"
 89      }
 90  
 91      guard let windows = axValue(appRef, "AXWindows") as? [AXUIElement],
 92            let win = windows.first else {
 93          return nil
 94      }
 95  
 96      let winTitle = axString(win, "AXTitle") ?? ""
 97      let roleFilter: Set<String>? = roles.flatMap { roles in
 98          roles.isEmpty ? nil : Set(roles)
 99      }
100  
101      var annotations: [AnnotationEntry] = []
102      var annotateRefPaths: [String: RefEntry] = [:]
103      var labelCounter = 0
104  
105      func walkForAnnotation(_ el: AXUIElement, path: String) {
106          guard labelCounter < maxLabels else { return }
107          guard let role = axString(el, "AXRole") else { return }
108  
109          let isInteractive = interactiveRoles.contains(role)
110          let matchesFilter = roleFilter == nil || roleFilter!.contains(role)
111  
112          if isInteractive && matchesFilter {
113              if let frame = elementFrame(el) {
114                  labelCounter += 1
115                  let ref = "a\(labelCounter)"
116                  let title = axString(el, "AXTitle") ?? axString(el, "AXDescription")
117                  annotations.append(AnnotationEntry(
118                      label: labelCounter, ref: ref, role: role,
119                      title: title,
120                      x: frame.x, y: frame.y,
121                      width: frame.width, height: frame.height
122                  ))
123                  annotateRefPaths[ref] = RefEntry(path: path, role: role)
124              }
125          }
126  
127          guard labelCounter < maxLabels else { return }
128          if let kids = axChildren(el) {
129              var childIndex: [String: Int] = [:]
130              for kid in kids {
131                  guard let kidRole = axString(kid, "AXRole") else { continue }
132                  let idx = childIndex[kidRole, default: 0]
133                  childIndex[kidRole] = idx + 1
134                  let childPath = "\(path)/\(kidRole)[\(idx)]"
135                  walkForAnnotation(kid, path: childPath)
136                  if labelCounter >= maxLabels { break }
137              }
138          }
139      }
140  
141      if let kids = axChildren(win) {
142          var childIndex: [String: Int] = [:]
143          for kid in kids {
144              guard let kidRole = axString(kid, "AXRole") else { continue }
145              let idx = childIndex[kidRole, default: 0]
146              childIndex[kidRole] = idx + 1
147              let path = "window[0]/\(kidRole)[\(idx)]"
148              walkForAnnotation(kid, path: path)
149              if labelCounter >= maxLabels { break }
150          }
151      }
152  
153      return AnnotateResult(
154          app: appName,
155          pid: pid,
156          window: winTitle,
157          annotations: annotations,
158          refPaths: annotateRefPaths
159      )
160  }
161  
162  func readTree(pid: Int, budget: Int, filter: String) -> ReadTreeResult? {
163      let appRef = AXUIElementCreateApplication(Int32(pid))
164      let appName: String
165      if let app = NSRunningApplication(processIdentifier: Int32(pid)) {
166          appName = app.localizedName ?? "Unknown"
167      } else {
168          appName = "Unknown"
169      }
170  
171      guard let windows = axValue(appRef, "AXWindows") as? [AXUIElement],
172            let win = windows.first else {
173          return nil
174      }
175  
176      let winTitle = axString(win, "AXTitle") ?? ""
177      refCounter = 0
178      refPaths = [:]
179      var elements: [Element] = []
180      if let kids = axChildren(win) {
181          var childIndex: [String: Int] = [:]
182          for kid in kids {
183              guard let kidRole = axString(kid, "AXRole") else { continue }
184              let idx = childIndex[kidRole, default: 0]
185              childIndex[kidRole] = idx + 1
186              let path = "window[0]/\(kidRole)[\(idx)]"
187              if let elem = walkTree(kid, semanticDepth: 0, budget: budget, filter: filter, path: path) {
188                  elements.append(elem)
189              }
190          }
191      }
192  
193      return ReadTreeResult(
194          app: appName,
195          pid: pid,
196          window: winTitle,
197          elements: elements,
198          refPaths: refPaths
199      )
200  }