/ src / lib / schedules / schedule-dedupe.ts
schedule-dedupe.ts
  1  import { CronExpressionParser } from 'cron-parser'
  2  import type { ScheduleType } from '@/types'
  3  import { dedup } from '@/lib/shared-utils'
  4  
  5  export type ScheduleLike = {
  6    id?: string
  7    name?: string | null
  8    agentId?: string | null
  9    taskPrompt?: string | null
 10    scheduleType?: ScheduleType | string | null
 11    cron?: string | null
 12    intervalMs?: number | null
 13    runAt?: number | null
 14    status?: string | null
 15    updatedAt?: number | null
 16    createdAt?: number | null
 17    createdByAgentId?: string | null
 18    createdInSessionId?: string | null
 19  }
 20  
 21  export interface ScheduleDuplicateCandidate {
 22    id?: string | null
 23    agentId?: string | null
 24    taskPrompt?: string | null
 25    scheduleType?: ScheduleType | string | null
 26    cron?: string | null
 27    intervalMs?: number | null
 28    runAt?: number | null
 29    createdByAgentId?: string | null
 30    createdInSessionId?: string | null
 31  }
 32  
 33  export interface FindDuplicateScheduleOptions {
 34    ignoreId?: string | null
 35    includeStatuses?: string[]
 36    creatorScope?: {
 37      agentId?: string | null
 38      sessionId?: string | null
 39    } | null
 40  }
 41  
 42  interface ScheduleSignature {
 43    id: string
 44    agentId: string
 45    taskPrompt: string
 46    promptTokens: string[]
 47    scheduleType: ScheduleType
 48    cron: string
 49    intervalMs: number | null
 50    runAt: number | null
 51  }
 52  
 53  type ScheduleMatchKind = 'exact' | 'fuzzy'
 54  
 55  const PROMPT_STOPWORDS = new Set([
 56    'a',
 57    'an',
 58    'and',
 59    'any',
 60    'at',
 61    'back',
 62    'by',
 63    'check',
 64    'for',
 65    'from',
 66    'if',
 67    'in',
 68    'into',
 69    'me',
 70    'my',
 71    'of',
 72    'on',
 73    'once',
 74    'please',
 75    'remind',
 76    'report',
 77    'task',
 78    'the',
 79    'this',
 80    'to',
 81    'up',
 82    'update',
 83    'updates',
 84    'with',
 85  ])
 86  
 87  const ONCE_MATCH_WINDOW_MS = 15 * 60 * 1000
 88  
 89  function normalizeString(value: unknown): string {
 90    return typeof value === 'string' ? value.trim() : ''
 91  }
 92  
 93  function normalizePrompt(value: unknown): string {
 94    const text = normalizeString(value)
 95    if (!text) return ''
 96    return text.replace(/\s+/g, ' ').trim().toLowerCase()
 97  }
 98  
 99  function normalizePromptToken(token: string): string {
100    let normalized = token
101    if (normalized.length > 4 && normalized.endsWith('ies')) normalized = `${normalized.slice(0, -3)}y`
102    else if (normalized.length > 5 && normalized.endsWith('ing')) normalized = normalized.slice(0, -3)
103    else if (normalized.length > 4 && normalized.endsWith('ed')) normalized = normalized.slice(0, -2)
104    else if (normalized.length > 3 && normalized.endsWith('s') && !normalized.endsWith('ss')) normalized = normalized.slice(0, -1)
105    return normalized
106  }
107  
108  function tokenizePrompt(value: unknown): string[] {
109    const normalized = normalizePrompt(value).replace(/[^a-z0-9]+/g, ' ')
110    if (!normalized) return []
111    return normalized
112      .split(' ')
113      .map((token) => normalizePromptToken(token.trim()))
114      .filter((token) => token.length > 0)
115      .filter((token) => token.length > 2 || ['ai', 'uk', 'us', 'eu'].includes(token))
116      .filter((token) => !PROMPT_STOPWORDS.has(token))
117  }
118  
119  function normalizeCron(value: unknown): string {
120    const cron = normalizeString(value)
121    if (!cron) return ''
122    return cron.replace(/\s+/g, ' ').trim()
123  }
124  
125  function normalizePositiveInt(value: unknown): number | null {
126    const parsed = typeof value === 'number'
127      ? value
128      : typeof value === 'string'
129        ? Number.parseInt(value, 10)
130        : Number.NaN
131    if (!Number.isFinite(parsed)) return null
132    const intVal = Math.trunc(parsed)
133    return intVal > 0 ? intVal : null
134  }
135  
136  function normalizeScheduleType(value: unknown): ScheduleType {
137    if (value === 'cron' || value === 'once' || value === 'interval') return value
138    return 'interval'
139  }
140  
141  function toSignature(raw: ScheduleLike | ScheduleDuplicateCandidate): ScheduleSignature {
142    return {
143      id: normalizeString(raw.id),
144      agentId: normalizeString(raw.agentId),
145      taskPrompt: normalizePrompt(raw.taskPrompt),
146      promptTokens: tokenizePrompt(raw.taskPrompt),
147      scheduleType: normalizeScheduleType(raw.scheduleType),
148      cron: normalizeCron(raw.cron),
149      intervalMs: normalizePositiveInt(raw.intervalMs),
150      runAt: normalizePositiveInt(raw.runAt),
151    }
152  }
153  
154  function cadenceKey(signature: ScheduleSignature): string {
155    if (signature.scheduleType === 'cron') return `cron:${signature.cron || ''}`
156    if (signature.scheduleType === 'interval') return `interval:${signature.intervalMs ?? ''}`
157    if (signature.scheduleType === 'once') return `once:${signature.runAt ?? ''}`
158    return signature.scheduleType
159  }
160  
161  export function getScheduleSignatureKey(input: ScheduleLike | ScheduleDuplicateCandidate): string {
162    const signature = toSignature(input)
163    if (!signature.agentId || !signature.taskPrompt) return ''
164    if (!sameCadence(signature, signature)) return ''
165    return `${signature.agentId}::${signature.taskPrompt}::${signature.scheduleType}::${cadenceKey(signature)}`
166  }
167  
168  function sameCadence(a: ScheduleSignature, b: ScheduleSignature): boolean {
169    if (a.scheduleType !== b.scheduleType) return false
170    if (a.scheduleType === 'cron') return a.cron !== '' && a.cron === b.cron
171    if (a.scheduleType === 'interval') return a.intervalMs != null && a.intervalMs === b.intervalMs
172    if (a.scheduleType === 'once') {
173      if (a.runAt == null || b.runAt == null) return false
174      return Math.abs(a.runAt - b.runAt) <= 1000
175    }
176    return false
177  }
178  
179  function tryResolveCronIntervalMs(cron: string): number | null {
180    if (!cron) return null
181    try {
182      const interval = CronExpressionParser.parse(cron, {
183        currentDate: new Date('2026-01-01T00:00:00.000Z'),
184      })
185      const first = interval.next().getTime()
186      const second = interval.next().getTime()
187      const diff = second - first
188      return diff > 0 ? diff : null
189    } catch {
190      return null
191    }
192  }
193  
194  function cadenceFamilyFromMs(intervalMs: number | null): string {
195    if (intervalMs == null || intervalMs <= 0) return ''
196  
197    const families: Array<{ label: string; ms: number; toleranceMs: number }> = [
198      { label: '15m', ms: 15 * 60 * 1000, toleranceMs: 60 * 1000 },
199      { label: '30m', ms: 30 * 60 * 1000, toleranceMs: 2 * 60 * 1000 },
200      { label: 'hourly', ms: 60 * 60 * 1000, toleranceMs: 5 * 60 * 1000 },
201      { label: '6h', ms: 6 * 60 * 60 * 1000, toleranceMs: 15 * 60 * 1000 },
202      { label: '12h', ms: 12 * 60 * 60 * 1000, toleranceMs: 30 * 60 * 1000 },
203      { label: 'daily', ms: 24 * 60 * 60 * 1000, toleranceMs: 60 * 60 * 1000 },
204      { label: 'weekly', ms: 7 * 24 * 60 * 60 * 1000, toleranceMs: 2 * 60 * 60 * 1000 },
205    ]
206  
207    for (const family of families) {
208      if (Math.abs(intervalMs - family.ms) <= family.toleranceMs) return family.label
209    }
210  
211    return `interval:${Math.round(intervalMs / 60_000)}m`
212  }
213  
214  function cadenceFamily(signature: ScheduleSignature): string {
215    if (signature.scheduleType === 'once') return signature.runAt != null ? 'once' : ''
216    if (signature.scheduleType === 'interval') return cadenceFamilyFromMs(signature.intervalMs)
217    if (signature.scheduleType === 'cron') return cadenceFamilyFromMs(tryResolveCronIntervalMs(signature.cron))
218    return ''
219  }
220  
221  function sameCadenceFamily(a: ScheduleSignature, b: ScheduleSignature): boolean {
222    if (sameCadence(a, b)) return true
223    if (a.scheduleType === 'once' && b.scheduleType === 'once') {
224      if (a.runAt == null || b.runAt == null) return false
225      return Math.abs(a.runAt - b.runAt) <= ONCE_MATCH_WINDOW_MS
226    }
227    if (a.scheduleType === 'once' || b.scheduleType === 'once') return false
228    const aFamily = cadenceFamily(a)
229    const bFamily = cadenceFamily(b)
230    return aFamily !== '' && aFamily === bFamily
231  }
232  
233  function countTokenOverlap(a: string[], b: string[]): number {
234    if (!a.length || !b.length) return 0
235    const smaller = a.length <= b.length ? a : b
236    const largerSet = new Set(a.length <= b.length ? b : a)
237    let overlap = 0
238    for (const token of new Set(smaller)) {
239      if (largerSet.has(token)) overlap += 1
240    }
241    return overlap
242  }
243  
244  function hasFuzzyPromptMatch(a: ScheduleSignature, b: ScheduleSignature): boolean {
245    if (!a.promptTokens.length || !b.promptTokens.length) return false
246    const uniqueA = dedup(a.promptTokens)
247    const uniqueB = dedup(b.promptTokens)
248    const overlap = countTokenOverlap(uniqueA, uniqueB)
249    if (overlap === 0) return false
250    const smallerSize = Math.min(uniqueA.length, uniqueB.length)
251    const largerSize = Math.max(uniqueA.length, uniqueB.length)
252    const coverage = overlap / smallerSize
253    const jaccard = overlap / new Set([...uniqueA, ...uniqueB]).size
254    if (smallerSize <= 2) return overlap === smallerSize
255    return overlap >= 2 && coverage >= 0.67 && (jaccard >= 0.5 || overlap >= Math.max(2, largerSize - 1))
256  }
257  
258  function isEligibleStatus(status: unknown, includeStatuses: Set<string>): boolean {
259    const normalized = normalizeString(status).toLowerCase() || 'active'
260    return includeStatuses.has(normalized)
261  }
262  
263  function matchesCreatorScope(
264    schedule: ScheduleLike,
265    scope: FindDuplicateScheduleOptions['creatorScope'],
266  ): boolean {
267    if (!scope) return true
268    const scopeAgent = normalizeString(scope.agentId)
269    const scopeSession = normalizeString(scope.sessionId)
270    if (!scopeAgent && !scopeSession) return true
271  
272    const existingAgent = normalizeString(schedule.createdByAgentId)
273    const existingSession = normalizeString(schedule.createdInSessionId)
274  
275    if (scopeAgent && existingAgent && scopeAgent !== existingAgent) return false
276    if (scopeSession && existingSession && scopeSession !== existingSession) return false
277    return true
278  }
279  
280  function compareUpdatedDesc(a: ScheduleLike, b: ScheduleLike): number {
281    const aTs = typeof a.updatedAt === 'number' ? a.updatedAt : (typeof a.createdAt === 'number' ? a.createdAt : 0)
282    const bTs = typeof b.updatedAt === 'number' ? b.updatedAt : (typeof b.createdAt === 'number' ? b.createdAt : 0)
283    return bTs - aTs
284  }
285  
286  export function findDuplicateSchedule(
287    schedules: Record<string, ScheduleLike>,
288    candidateRaw: ScheduleDuplicateCandidate,
289    opts: FindDuplicateScheduleOptions = {},
290  ): ScheduleLike | null {
291    return findEquivalentSchedules(schedules, candidateRaw, opts)[0] || null
292  }
293  
294  export function findEquivalentSchedules(
295    schedules: Record<string, ScheduleLike>,
296    candidateRaw: ScheduleDuplicateCandidate,
297    opts: FindDuplicateScheduleOptions = {},
298  ): ScheduleLike[] {
299    const candidate = toSignature(candidateRaw)
300    if (!candidate.agentId) return []
301    if (!candidate.taskPrompt) return []
302  
303    const ignoreId = normalizeString(opts.ignoreId || candidate.id)
304    const statuses = new Set((opts.includeStatuses?.length ? opts.includeStatuses : ['active', 'paused']).map((s) => s.toLowerCase()))
305    const scopeSessionId = normalizeString(opts.creatorScope?.sessionId)
306  
307    const matches = Object.values(schedules)
308      .filter((existing) => existing && typeof existing === 'object')
309      .map((existing) => {
310        const signature = toSignature(existing)
311        if (!signature.id) return null
312        if (ignoreId && signature.id === ignoreId) return null
313        if (!isEligibleStatus(existing.status, statuses)) return null
314        if (!matchesCreatorScope(existing, opts.creatorScope || null)) return null
315        if (signature.agentId !== candidate.agentId) return null
316        const exact = signature.taskPrompt === candidate.taskPrompt && sameCadence(signature, candidate)
317        if (exact) return { existing, kind: 'exact' as const }
318        const fuzzy = Boolean(scopeSessionId)
319          && hasFuzzyPromptMatch(signature, candidate)
320          && sameCadenceFamily(signature, candidate)
321        if (!fuzzy) return null
322        return { existing, kind: 'fuzzy' as const }
323      })
324      .filter((entry): entry is { existing: ScheduleLike; kind: ScheduleMatchKind } => Boolean(entry))
325      .sort((a, b) => {
326        if (a.kind !== b.kind) return a.kind === 'exact' ? -1 : 1
327        return compareUpdatedDesc(a.existing, b.existing)
328      })
329      .map((entry) => entry.existing)
330  
331    return matches
332  }