/ skills / bundled / loremIpsum.ts
loremIpsum.ts
  1  import { registerBundledSkill } from '../bundledSkills.js'
  2  
  3  // Verified 1-token words (tested via API token counting)
  4  // All common English words confirmed to tokenize as single tokens
  5  const ONE_TOKEN_WORDS = [
  6    // Articles & pronouns
  7    'the',
  8    'a',
  9    'an',
 10    'I',
 11    'you',
 12    'he',
 13    'she',
 14    'it',
 15    'we',
 16    'they',
 17    'me',
 18    'him',
 19    'her',
 20    'us',
 21    'them',
 22    'my',
 23    'your',
 24    'his',
 25    'its',
 26    'our',
 27    'this',
 28    'that',
 29    'what',
 30    'who',
 31    // Common verbs
 32    'is',
 33    'are',
 34    'was',
 35    'were',
 36    'be',
 37    'been',
 38    'have',
 39    'has',
 40    'had',
 41    'do',
 42    'does',
 43    'did',
 44    'will',
 45    'would',
 46    'can',
 47    'could',
 48    'may',
 49    'might',
 50    'must',
 51    'shall',
 52    'should',
 53    'make',
 54    'made',
 55    'get',
 56    'got',
 57    'go',
 58    'went',
 59    'come',
 60    'came',
 61    'see',
 62    'saw',
 63    'know',
 64    'take',
 65    'think',
 66    'look',
 67    'want',
 68    'use',
 69    'find',
 70    'give',
 71    'tell',
 72    'work',
 73    'call',
 74    'try',
 75    'ask',
 76    'need',
 77    'feel',
 78    'seem',
 79    'leave',
 80    'put',
 81    // Common nouns & adjectives
 82    'time',
 83    'year',
 84    'day',
 85    'way',
 86    'man',
 87    'thing',
 88    'life',
 89    'hand',
 90    'part',
 91    'place',
 92    'case',
 93    'point',
 94    'fact',
 95    'good',
 96    'new',
 97    'first',
 98    'last',
 99    'long',
100    'great',
101    'little',
102    'own',
103    'other',
104    'old',
105    'right',
106    'big',
107    'high',
108    'small',
109    'large',
110    'next',
111    'early',
112    'young',
113    'few',
114    'public',
115    'bad',
116    'same',
117    'able',
118    // Prepositions & conjunctions
119    'in',
120    'on',
121    'at',
122    'to',
123    'for',
124    'of',
125    'with',
126    'from',
127    'by',
128    'about',
129    'like',
130    'through',
131    'over',
132    'before',
133    'between',
134    'under',
135    'since',
136    'without',
137    'and',
138    'or',
139    'but',
140    'if',
141    'than',
142    'because',
143    'as',
144    'until',
145    'while',
146    'so',
147    'though',
148    'both',
149    'each',
150    'when',
151    'where',
152    'why',
153    'how',
154    // Common adverbs
155    'not',
156    'now',
157    'just',
158    'more',
159    'also',
160    'here',
161    'there',
162    'then',
163    'only',
164    'very',
165    'well',
166    'back',
167    'still',
168    'even',
169    'much',
170    'too',
171    'such',
172    'never',
173    'again',
174    'most',
175    'once',
176    'off',
177    'away',
178    'down',
179    'out',
180    'up',
181    // Tech/common words
182    'test',
183    'code',
184    'data',
185    'file',
186    'line',
187    'text',
188    'word',
189    'number',
190    'system',
191    'program',
192    'set',
193    'run',
194    'value',
195    'name',
196    'type',
197    'state',
198    'end',
199    'start',
200  ]
201  
202  function generateLoremIpsum(targetTokens: number): string {
203    let tokens = 0
204    let result = ''
205  
206    while (tokens < targetTokens) {
207      // Sentence: 10-20 words
208      const sentenceLength = 10 + Math.floor(Math.random() * 11)
209      let wordsInSentence = 0
210  
211      for (let i = 0; i < sentenceLength && tokens < targetTokens; i++) {
212        const word =
213          ONE_TOKEN_WORDS[Math.floor(Math.random() * ONE_TOKEN_WORDS.length)]
214        result += word
215        tokens++
216        wordsInSentence++
217  
218        if (i === sentenceLength - 1 || tokens >= targetTokens) {
219          result += '. '
220        } else {
221          result += ' '
222        }
223      }
224  
225      // Paragraph break every 5-8 sentences (roughly 20% chance per sentence)
226      if (wordsInSentence > 0 && Math.random() < 0.2 && tokens < targetTokens) {
227        result += '\n\n'
228      }
229    }
230  
231    return result.trim()
232  }
233  
234  export function registerLoremIpsumSkill(): void {
235    if (process.env.USER_TYPE !== 'ant') {
236      return
237    }
238  
239    registerBundledSkill({
240      name: 'lorem-ipsum',
241      description:
242        'Generate filler text for long context testing. Specify token count as argument (e.g., /lorem-ipsum 50000). Outputs approximately the requested number of tokens. Ant-only.',
243      argumentHint: '[token_count]',
244      userInvocable: true,
245      async getPromptForCommand(args) {
246        const parsed = parseInt(args)
247  
248        if (args && (isNaN(parsed) || parsed <= 0)) {
249          return [
250            {
251              type: 'text',
252              text: 'Invalid token count. Please provide a positive number (e.g., /lorem-ipsum 10000).',
253            },
254          ]
255        }
256  
257        const targetTokens = parsed || 10000
258  
259        // Cap at 500k tokens for safety
260        const cappedTokens = Math.min(targetTokens, 500_000)
261  
262        if (cappedTokens < targetTokens) {
263          return [
264            {
265              type: 'text',
266              text: `Requested ${targetTokens} tokens, but capped at 500,000 for safety.\n\n${generateLoremIpsum(cappedTokens)}`,
267            },
268          ]
269        }
270  
271        const loremText = generateLoremIpsum(cappedTokens)
272  
273        // Just dump the lorem ipsum text into the conversation
274        return [
275          {
276            type: 'text',
277            text: loremText,
278          },
279        ]
280      },
281    })
282  }