/ specs / construct_language_cl_norm_2_spec.tex
construct_language_cl_norm_2_spec.tex
  1  \documentclass[11pt]{article}
  2  
  3  \usepackage[margin=1in]{geometry}
  4  \usepackage{microtype}
  5  \usepackage{booktabs}
  6  \usepackage{longtable}
  7  \usepackage[hidelinks]{hyperref}
  8  \usepackage{amsmath}
  9  
 10  \title{CL-NORM-2 --- Scope Inference and Ambiguity Resolution}
 11  \author{}
 12  \date{January 13, 2026}
 13  
 14  \begin{document}
 15  \maketitle
 16  
 17  \section{Purpose}
 18  
 19  CL-NORM-1 provides deterministic ordering of instances for stable serialization.
 20  CL-NORM-2 goes further: it attempts to remove a major source of ambiguity in human-authored Constructs by
 21  \textbf{inferring scope} from geometry.
 22  
 23  The goal is not to mimic natural-language grammar. The goal is exchange stability: the same diagram layout implies
 24  the same scope tree even when \texttt{scope} fields are missing or inconsistent.
 25  
 26  \section{Terminology}
 27  
 28  \begin{itemize}
 29    \item \textbf{Frame instance}: an instance whose glyph is typed as \texttt{frame}. Frames induce scope.
 30    \item \textbf{Scope}: a parent instance id (typically a frame) that contextualizes a set of instances.
 31    \item \textbf{Containment (heuristic)}: a geometric test determining whether one instance is inside a frame.
 32  \end{itemize}
 33  
 34  \section{Scope Inference Algorithm}
 35  
 36  Given normalized instances (coords/rot/ref/scale quantized):
 37  
 38  \subsection{Step 1: Identify Frames}
 39  
 40  Frames are identified by glyph ids (inventory-dependent). For the current inventory, frames include:
 41  
 42  \begin{itemize}
 43    \item seed frames: \texttt{G07}, \texttt{G08}, \texttt{G18}
 44    \item extended frames: \texttt{G30}..\texttt{G35}
 45  \end{itemize}
 46  
 47  \subsection{Step 2: Assign a Semantic Radius}
 48  
 49  Each instance is assigned an \emph{effective semantic radius}:
 50  
 51  \[
 52  r(i) = r_{\text{base}}(\text{type}(i)) \cdot s(i)
 53  \]
 54  
 55  where \(s(i)\) is the instance scale and \(r_{\text{base}}\) is type-dependent. Frames use a deliberately larger base
 56  radius than their drawn stroke so that ``frame-gesture'' glyphs can function as semantic containers.
 57  
 58  A practical base radius set:
 59  
 60  \begin{center}
 61  \begin{tabular}{@{}lr@{}}
 62  \toprule
 63  Type & \(r_{\text{base}}\) \\
 64  \midrule
 65  frame & 40 \\
 66  stance & 22 \\
 67  relation & 22 \\
 68  operator & 22 \\
 69  object & 18 \\
 70  \bottomrule
 71  \end{tabular}
 72  \end{center}
 73  
 74  \subsection{Step 3: Containment Test}
 75  
 76  A frame \(F\) contains an instance \(X\) if:
 77  
 78  \[
 79  \operatorname{dist}(F, X) \le r(F) - r(X) - m
 80  \]
 81  
 82  with margin \(m = 2\).
 83  
 84  \subsection{Step 4: Choose the Deepest Scope}
 85  
 86  For each instance \(X\), compute the set of containing frames:
 87  
 88  \[
 89  C(X) = \{F \mid F \text{ is a frame and } F \text{ contains } X\}
 90  \]
 91  
 92  If \(C(X)\) is empty, \(X\) is in the root scope. Otherwise, choose the \textbf{smallest} container:
 93  
 94  \[
 95  \operatorname{scope}(X) = \arg\min_{F \in C(X)} r(F)
 96  \]
 97  
 98  This produces the deepest (most specific) scope.
 99  
100  \subsection{Step 5: Validate or Rewrite \texttt{scope}}
101  
102  \begin{itemize}
103    \item In non-strict mode, inferred scope \emph{replaces} any existing \texttt{scope} field.
104    \item In strict mode, a mismatch between declared and inferred scope is an error.
105  \end{itemize}
106  
107  \section{Canonical Scope Tree Order}
108  
109  Frames induce a scope tree. Canonical ordering of scopes is a depth-first traversal from root.
110  Children are ordered deterministically by their frame instance key (glyph id, then position, then id).
111  
112  Within each scope, instances are ordered deterministically (CL-NORM-1 key).
113  
114  \section{Why this is still ``non-linguistic''}
115  
116  This algorithm does not introduce parts of speech, tense, agreement, or word order.
117  It uses only:
118  
119  \begin{itemize}
120    \item geometry (distance + scale)
121    \item typed frame behavior (container vs non-container)
122    \item deterministic tie-breaking
123  \end{itemize}
124  
125  The result is a more stable construct object without becoming a human language.
126  
127  \section{Notes}
128  
129  CL-NORM-2 is heuristic by design. Future revisions can replace circular radius tests with
130  shape-specific bounds if glyph geometry is made available to the normalizer.
131  
132  \end{document}